aboutsummaryrefslogtreecommitdiffstats
path: root/examples
diff options
context:
space:
mode:
authorC.J. Collier <cjcollier@linuxfoundation.org>2016-06-14 07:50:17 -0700
committerC.J. Collier <cjcollier@linuxfoundation.org>2016-06-14 12:17:54 -0700
commit97f17497d162afdb82c8704bf097f0fee3724b2e (patch)
tree1c6269614c0c15ffef8451c58ae8f8b30a1bc804 /examples
parente04be89c2409570e0055b2cda60bd11395bb93b0 (diff)
Imported Upstream version 16.04
Change-Id: I77eadcd8538a9122e4773cbe55b24033dc451757 Signed-off-by: C.J. Collier <cjcollier@linuxfoundation.org>
Diffstat (limited to 'examples')
-rw-r--r--examples/Makefile98
-rw-r--r--examples/bond/Makefile57
-rw-r--r--examples/bond/main.c786
-rw-r--r--examples/bond/main.h39
-rw-r--r--examples/cmdline/Makefile51
-rw-r--r--examples/cmdline/commands.c283
-rw-r--r--examples/cmdline/commands.h39
-rw-r--r--examples/cmdline/main.c96
-rw-r--r--examples/cmdline/parse_obj_list.c166
-rw-r--r--examples/cmdline/parse_obj_list.h112
-rw-r--r--examples/distributor/Makefile57
-rw-r--r--examples/distributor/main.c600
-rw-r--r--examples/dpdk_qat/Makefile93
-rw-r--r--examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf65
-rw-r--r--examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf293
-rw-r--r--examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf292
-rw-r--r--examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf235
-rw-r--r--examples/dpdk_qat/crypto.c944
-rw-r--r--examples/dpdk_qat/crypto.h90
-rw-r--r--examples/dpdk_qat/main.c824
-rw-r--r--examples/ethtool/Makefile49
-rw-r--r--examples/ethtool/ethtool-app/Makefile54
-rw-r--r--examples/ethtool/ethtool-app/ethapp.c873
-rw-r--r--examples/ethtool/ethtool-app/ethapp.h41
-rw-r--r--examples/ethtool/ethtool-app/main.c305
-rw-r--r--examples/ethtool/lib/Makefile57
-rw-r--r--examples/ethtool/lib/rte_ethtool.c423
-rw-r--r--examples/ethtool/lib/rte_ethtool.h410
-rw-r--r--examples/exception_path/Makefile58
-rw-r--r--examples/exception_path/main.c571
-rw-r--r--examples/helloworld/Makefile50
-rw-r--r--examples/helloworld/main.c77
-rw-r--r--examples/ip_fragmentation/Makefile57
-rw-r--r--examples/ip_fragmentation/main.c965
-rw-r--r--examples/ip_pipeline/Makefile79
-rw-r--r--examples/ip_pipeline/app.h949
-rw-r--r--examples/ip_pipeline/config/edge_router_downstream.cfg85
-rw-r--r--examples/ip_pipeline/config/edge_router_downstream.sh10
-rw-r--r--examples/ip_pipeline/config/edge_router_upstream.cfg110
-rw-r--r--examples/ip_pipeline/config/edge_router_upstream.sh38
-rw-r--r--examples/ip_pipeline/config/ip_pipeline.cfg9
-rw-r--r--examples/ip_pipeline/config/ip_pipeline.sh5
-rw-r--r--examples/ip_pipeline/config/l2fwd.cfg55
-rw-r--r--examples/ip_pipeline/config/l3fwd.cfg63
-rw-r--r--examples/ip_pipeline/config/l3fwd.sh9
-rw-r--r--examples/ip_pipeline/config/tm_profile.cfg105
-rw-r--r--examples/ip_pipeline/config_check.c444
-rw-r--r--examples/ip_pipeline/config_parse.c3383
-rw-r--r--examples/ip_pipeline/config_parse_tm.c448
-rw-r--r--examples/ip_pipeline/cpu_core_map.c492
-rw-r--r--examples/ip_pipeline/cpu_core_map.h69
-rw-r--r--examples/ip_pipeline/init.c1637
-rw-r--r--examples/ip_pipeline/main.c64
-rw-r--r--examples/ip_pipeline/parser.h50
-rw-r--r--examples/ip_pipeline/pipeline.h93
-rw-r--r--examples/ip_pipeline/pipeline/hash_func.h351
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_actions_common.h231
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_common_be.c206
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_common_be.h163
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_common_fe.c1310
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_common_fe.h234
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_firewall.c1869
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_firewall.h77
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_firewall_be.c907
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_firewall_be.h176
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_actions.c1814
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_actions.h78
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c1011
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h168
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_classification.c2215
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_classification.h107
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c811
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h142
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_master.c47
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_master.h41
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_master_be.c150
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_master_be.h41
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_passthrough.c47
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_passthrough.h41
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_passthrough_be.c804
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_passthrough_be.h59
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_routing.c2239
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_routing.h93
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_routing_be.c1970
-rw-r--r--examples/ip_pipeline/pipeline/pipeline_routing_be.h296
-rw-r--r--examples/ip_pipeline/pipeline_be.h305
-rw-r--r--examples/ip_pipeline/thread.c322
-rw-r--r--examples/ip_pipeline/thread.h98
-rw-r--r--examples/ip_pipeline/thread_fe.c461
-rw-r--r--examples/ip_pipeline/thread_fe.h101
-rw-r--r--examples/ip_reassembly/Makefile57
-rw-r--r--examples/ip_reassembly/main.c1185
-rw-r--r--examples/ipsec-secgw/Makefile62
-rw-r--r--examples/ipsec-secgw/esp.c251
-rw-r--r--examples/ipsec-secgw/esp.h66
-rw-r--r--examples/ipsec-secgw/ipip.h103
-rw-r--r--examples/ipsec-secgw/ipsec-secgw.c1362
-rw-r--r--examples/ipsec-secgw/ipsec.c203
-rw-r--r--examples/ipsec-secgw/ipsec.h190
-rw-r--r--examples/ipsec-secgw/rt.c145
-rw-r--r--examples/ipsec-secgw/sa.c446
-rw-r--r--examples/ipsec-secgw/sp.c366
-rw-r--r--examples/ipv4_multicast/Makefile57
-rw-r--r--examples/ipv4_multicast/main.c819
-rw-r--r--examples/kni/Makefile55
-rw-r--r--examples/kni/main.c928
-rw-r--r--examples/l2fwd-cat/Makefile70
-rw-r--r--examples/l2fwd-cat/cat.c996
-rw-r--r--examples/l2fwd-cat/cat.h72
-rw-r--r--examples/l2fwd-cat/l2fwd-cat.c224
-rw-r--r--examples/l2fwd-crypto/Makefile50
-rw-r--r--examples/l2fwd-crypto/main.c2056
-rw-r--r--examples/l2fwd-ivshmem/Makefile43
-rw-r--r--examples/l2fwd-ivshmem/guest/Makefile50
-rw-r--r--examples/l2fwd-ivshmem/guest/guest.c452
-rw-r--r--examples/l2fwd-ivshmem/host/Makefile50
-rw-r--r--examples/l2fwd-ivshmem/host/host.c897
-rw-r--r--examples/l2fwd-ivshmem/include/common.h111
-rw-r--r--examples/l2fwd-jobstats/Makefile51
-rw-r--r--examples/l2fwd-jobstats/main.c1026
-rw-r--r--examples/l2fwd-keepalive/Makefile50
-rw-r--r--examples/l2fwd-keepalive/main.c782
-rw-r--r--examples/l2fwd/Makefile50
-rw-r--r--examples/l2fwd/main.c720
-rw-r--r--examples/l3fwd-acl/Makefile56
-rw-r--r--examples/l3fwd-acl/main.c2079
-rw-r--r--examples/l3fwd-power/Makefile63
-rw-r--r--examples/l3fwd-power/main.c1760
-rw-r--r--examples/l3fwd-vf/Makefile56
-rw-r--r--examples/l3fwd-vf/main.c1097
-rw-r--r--examples/l3fwd/Makefile51
-rw-r--r--examples/l3fwd/l3fwd.h241
-rw-r--r--examples/l3fwd/l3fwd_em.c801
-rw-r--r--examples/l3fwd/l3fwd_em.h138
-rw-r--r--examples/l3fwd/l3fwd_em_hlm_sse.h342
-rw-r--r--examples/l3fwd/l3fwd_em_sse.h112
-rw-r--r--examples/l3fwd/l3fwd_lpm.c356
-rw-r--r--examples/l3fwd/l3fwd_lpm.h151
-rw-r--r--examples/l3fwd/l3fwd_lpm_sse.h213
-rw-r--r--examples/l3fwd/l3fwd_sse.h501
-rw-r--r--examples/l3fwd/main.c1055
-rw-r--r--examples/link_status_interrupt/Makefile50
-rw-r--r--examples/link_status_interrupt/main.c732
-rw-r--r--examples/load_balancer/Makefile57
-rw-r--r--examples/load_balancer/config.c1063
-rw-r--r--examples/load_balancer/init.c521
-rw-r--r--examples/load_balancer/main.c109
-rw-r--r--examples/load_balancer/main.h371
-rw-r--r--examples/load_balancer/runtime.c668
-rw-r--r--examples/multi_process/Makefile45
-rw-r--r--examples/multi_process/client_server_mp/Makefile44
-rw-r--r--examples/multi_process/client_server_mp/mp_client/Makefile48
-rw-r--r--examples/multi_process/client_server_mp/mp_client/client.c305
-rw-r--r--examples/multi_process/client_server_mp/mp_server/Makefile61
-rw-r--r--examples/multi_process/client_server_mp/mp_server/args.c172
-rw-r--r--examples/multi_process/client_server_mp/mp_server/args.h39
-rw-r--r--examples/multi_process/client_server_mp/mp_server/init.c305
-rw-r--r--examples/multi_process/client_server_mp/mp_server/init.h72
-rw-r--r--examples/multi_process/client_server_mp/mp_server/main.c319
-rw-r--r--examples/multi_process/client_server_mp/shared/common.h87
-rw-r--r--examples/multi_process/l2fwd_fork/Makefile50
-rw-r--r--examples/multi_process/l2fwd_fork/flib.c313
-rw-r--r--examples/multi_process/l2fwd_fork/flib.h149
-rw-r--r--examples/multi_process/l2fwd_fork/main.c1288
-rw-r--r--examples/multi_process/simple_mp/Makefile50
-rw-r--r--examples/multi_process/simple_mp/main.c155
-rw-r--r--examples/multi_process/simple_mp/mp_commands.c166
-rw-r--r--examples/multi_process/simple_mp/mp_commands.h44
-rw-r--r--examples/multi_process/symmetric_mp/Makefile50
-rw-r--r--examples/multi_process/symmetric_mp/main.c472
-rw-r--r--examples/netmap_compat/Makefile50
-rw-r--r--examples/netmap_compat/bridge/Makefile62
-rw-r--r--examples/netmap_compat/bridge/bridge.c377
-rw-r--r--examples/netmap_compat/lib/compat_netmap.c908
-rw-r--r--examples/netmap_compat/lib/compat_netmap.h80
-rw-r--r--examples/netmap_compat/netmap/netmap.h289
-rw-r--r--examples/netmap_compat/netmap/netmap_user.h95
-rw-r--r--examples/packet_ordering/Makefile50
-rw-r--r--examples/packet_ordering/main.c756
-rw-r--r--examples/performance-thread/Makefile49
-rw-r--r--examples/performance-thread/common/arch/x86/ctx.c93
-rw-r--r--examples/performance-thread/common/arch/x86/ctx.h57
-rw-r--r--examples/performance-thread/common/common.mk42
-rw-r--r--examples/performance-thread/common/lthread.c529
-rw-r--r--examples/performance-thread/common/lthread.h99
-rw-r--r--examples/performance-thread/common/lthread_api.h832
-rw-r--r--examples/performance-thread/common/lthread_cond.c239
-rw-r--r--examples/performance-thread/common/lthread_cond.h77
-rw-r--r--examples/performance-thread/common/lthread_diag.c323
-rw-r--r--examples/performance-thread/common/lthread_diag.h132
-rw-r--r--examples/performance-thread/common/lthread_diag_api.h325
-rw-r--r--examples/performance-thread/common/lthread_int.h212
-rw-r--r--examples/performance-thread/common/lthread_mutex.c254
-rw-r--r--examples/performance-thread/common/lthread_mutex.h52
-rw-r--r--examples/performance-thread/common/lthread_objcache.h158
-rw-r--r--examples/performance-thread/common/lthread_pool.h332
-rw-r--r--examples/performance-thread/common/lthread_queue.h302
-rw-r--r--examples/performance-thread/common/lthread_sched.c599
-rw-r--r--examples/performance-thread/common/lthread_sched.h152
-rw-r--r--examples/performance-thread/common/lthread_timer.h79
-rw-r--r--examples/performance-thread/common/lthread_tls.c253
-rw-r--r--examples/performance-thread/common/lthread_tls.h57
-rw-r--r--examples/performance-thread/l3fwd-thread/Makefile57
-rw-r--r--examples/performance-thread/l3fwd-thread/main.c3651
-rwxr-xr-xexamples/performance-thread/l3fwd-thread/test.sh149
-rw-r--r--examples/performance-thread/pthread_shim/Makefile60
-rw-r--r--examples/performance-thread/pthread_shim/main.c287
-rw-r--r--examples/performance-thread/pthread_shim/pthread_shim.c719
-rw-r--r--examples/performance-thread/pthread_shim/pthread_shim.h113
-rw-r--r--examples/ptpclient/Makefile57
-rw-r--r--examples/ptpclient/ptpclient.c780
-rw-r--r--examples/qos_meter/Makefile56
-rw-r--r--examples/qos_meter/main.c394
-rw-r--r--examples/qos_meter/main.h93
-rw-r--r--examples/qos_meter/rte_policer.c58
-rw-r--r--examples/qos_meter/rte_policer.h64
-rw-r--r--examples/qos_sched/Makefile60
-rw-r--r--examples/qos_sched/app_thread.c293
-rw-r--r--examples/qos_sched/args.c485
-rw-r--r--examples/qos_sched/cfg_file.c342
-rw-r--r--examples/qos_sched/cfg_file.h46
-rw-r--r--examples/qos_sched/cmdline.c643
-rw-r--r--examples/qos_sched/init.c370
-rw-r--r--examples/qos_sched/main.c254
-rw-r--r--examples/qos_sched/main.h195
-rw-r--r--examples/qos_sched/profile.cfg104
-rw-r--r--examples/qos_sched/profile_ov.cfg90
-rw-r--r--examples/qos_sched/stats.c315
-rw-r--r--examples/quota_watermark/Makefile44
-rw-r--r--examples/quota_watermark/include/conf.h48
-rw-r--r--examples/quota_watermark/qw/Makefile50
-rw-r--r--examples/quota_watermark/qw/args.c104
-rw-r--r--examples/quota_watermark/qw/args.h41
-rw-r--r--examples/quota_watermark/qw/init.c174
-rw-r--r--examples/quota_watermark/qw/init.h43
-rw-r--r--examples/quota_watermark/qw/main.c372
-rw-r--r--examples/quota_watermark/qw/main.h59
-rw-r--r--examples/quota_watermark/qwctl/Makefile50
-rw-r--r--examples/quota_watermark/qwctl/commands.c217
-rw-r--r--examples/quota_watermark/qwctl/commands.h41
-rw-r--r--examples/quota_watermark/qwctl/qwctl.c95
-rw-r--r--examples/quota_watermark/qwctl/qwctl.h40
-rw-r--r--examples/rxtx_callbacks/Makefile57
-rw-r--r--examples/rxtx_callbacks/main.c225
-rw-r--r--examples/skeleton/Makefile57
-rw-r--r--examples/skeleton/basicfwd.c211
-rw-r--r--examples/tep_termination/Makefile56
-rw-r--r--examples/tep_termination/main.c1275
-rw-r--r--examples/tep_termination/main.h129
-rw-r--r--examples/tep_termination/vxlan.c259
-rw-r--r--examples/tep_termination/vxlan.h86
-rw-r--r--examples/tep_termination/vxlan_setup.c457
-rw-r--r--examples/tep_termination/vxlan_setup.h87
-rw-r--r--examples/timer/Makefile56
-rw-r--r--examples/timer/main.c151
-rw-r--r--examples/vhost/Makefile59
-rw-r--r--examples/vhost/main.c3157
-rw-r--r--examples/vhost/main.h115
-rw-r--r--examples/vhost_xen/Makefile52
-rw-r--r--examples/vhost_xen/main.c1530
-rw-r--r--examples/vhost_xen/main.h77
-rw-r--r--examples/vhost_xen/vhost_monitor.c595
-rw-r--r--examples/vhost_xen/virtio-net.h113
-rw-r--r--examples/vhost_xen/xen_vhost.h148
-rw-r--r--examples/vhost_xen/xenstore_parse.c775
-rw-r--r--examples/vm_power_manager/Makefile65
-rw-r--r--examples/vm_power_manager/channel_manager.c805
-rw-r--r--examples/vm_power_manager/channel_manager.h320
-rw-r--r--examples/vm_power_manager/channel_monitor.c233
-rw-r--r--examples/vm_power_manager/channel_monitor.h102
-rw-r--r--examples/vm_power_manager/guest_cli/Makefile56
-rw-r--r--examples/vm_power_manager/guest_cli/main.c86
-rw-r--r--examples/vm_power_manager/guest_cli/vm_power_cli_guest.c155
-rw-r--r--examples/vm_power_manager/guest_cli/vm_power_cli_guest.h55
-rw-r--r--examples/vm_power_manager/main.c115
-rw-r--r--examples/vm_power_manager/power_manager.c252
-rw-r--r--examples/vm_power_manager/power_manager.h188
-rw-r--r--examples/vm_power_manager/vm_power_cli.c672
-rw-r--r--examples/vm_power_manager/vm_power_cli.h47
-rw-r--r--examples/vmdq/Makefile51
-rw-r--r--examples/vmdq/main.c641
-rw-r--r--examples/vmdq_dcb/Makefile59
-rw-r--r--examples/vmdq_dcb/main.c705
283 files changed, 100675 insertions, 0 deletions
diff --git a/examples/Makefile b/examples/Makefile
new file mode 100644
index 00000000..b28b30e7
--- /dev/null
+++ b/examples/Makefile
@@ -0,0 +1,98 @@
+# BSD LICENSE
+#
+# Copyright(c) 2014 6WIND S.A.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of 6WIND S.A. nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_LIBRTE_PMD_BOND) += bond
+DIRS-y += cmdline
+DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += distributor
+ifneq ($(ICP_ROOT),)
+DIRS-y += dpdk_qat
+endif
+DIRS-y += ethtool
+DIRS-y += exception_path
+DIRS-y += helloworld
+DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += ip_pipeline
+ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
+DIRS-$(CONFIG_RTE_IP_FRAG) += ip_reassembly
+DIRS-$(CONFIG_RTE_IP_FRAG) += ip_fragmentation
+endif
+ifeq ($(CONFIG_RTE_LIBRTE_ACL)$(CONFIG_RTE_LIBRTE_HASH)$(CONFIG_RTE_LIBRTE_LPM),yyy)
+DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += ipsec-secgw
+endif
+DIRS-y += ipv4_multicast
+DIRS-$(CONFIG_RTE_LIBRTE_KNI) += kni
+DIRS-y += l2fwd
+ifneq ($(PQOS_INSTALL_PATH),)
+DIRS-y += l2fwd-cat
+endif
+DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += l2fwd-crypto
+DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += l2fwd-ivshmem
+DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += l2fwd-jobstats
+DIRS-y += l2fwd-keepalive
+DIRS-$(CONFIG_RTE_LIBRTE_LPM) += l3fwd
+DIRS-$(CONFIG_RTE_LIBRTE_ACL) += l3fwd-acl
+ifeq ($(CONFIG_RTE_LIBRTE_LPM),y)
+DIRS-$(CONFIG_RTE_LIBRTE_POWER) += l3fwd-power
+DIRS-y += l3fwd-vf
+endif
+DIRS-y += link_status_interrupt
+DIRS-$(CONFIG_RTE_LIBRTE_LPM) += load_balancer
+DIRS-y += multi_process
+DIRS-y += netmap_compat/bridge
+DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += packet_ordering
+DIRS-$(CONFIG_RTE_LIBRTE_IEEE1588) += ptpclient
+DIRS-$(CONFIG_RTE_LIBRTE_METER) += qos_meter
+DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += qos_sched
+DIRS-y += quota_watermark
+DIRS-$(CONFIG_RTE_ETHDEV_RXTX_CALLBACKS) += rxtx_callbacks
+DIRS-y += skeleton
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += tep_termination
+DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += timer
+DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost
+DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += vhost_xen
+DIRS-y += vmdq
+DIRS-y += vmdq_dcb
+ifeq ($(CONFIG_RTE_LIBRTE_POWER), y)
+ifeq ($(shell pkg-config --atleast-version=0.9.3 libvirt; echo $$?), 0)
+DIRS-y += vm_power_manager
+else
+$(info vm_power_manager requires libvirt >= 0.9.3)
+endif
+endif
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/bond/Makefile b/examples/bond/Makefile
new file mode 100644
index 00000000..626d79d9
--- /dev/null
+++ b/examples/bond/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = bond_app
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+CFLAGS += -O3
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/bond/main.c b/examples/bond/main.c
new file mode 100644
index 00000000..53bd0441
--- /dev/null
+++ b/examples/bond/main.c
@@ -0,0 +1,786 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <termios.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_arp.h>
+#include <rte_spinlock.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "main.h"
+
+#include <rte_devargs.h>
+
+
+#include "rte_byteorder.h"
+#include "rte_cpuflags.h"
+#include "rte_eth_bond.h"
+
+#define RTE_LOGTYPE_DCB RTE_LOGTYPE_USER1
+
+#define NB_MBUF (1024*8)
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+#define BURST_RX_INTERVAL_NS (10) /* RX poll interval ~100ns */
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+#define RX_FTHRESH (MAX_PKT_BURST * 2)/**< Default values of RX free threshold reg. */
+
+/*
+ * These default values are optimized for use with the Intel(R) 82599 10 GbE
+ * Controller and the DPDK ixgbe PMD. Consider using other values for other
+ * network controllers and/or network drivers.
+ */
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_RX_DESC_DEFAULT 128
+#define RTE_TX_DESC_DEFAULT 512
+
+#define BOND_IP_1 7
+#define BOND_IP_2 0
+#define BOND_IP_3 0
+#define BOND_IP_4 10
+
+/* not defined under linux */
+#ifndef NIPQUAD
+#define NIPQUAD_FMT "%u.%u.%u.%u"
+#endif
+
+#define MAX_PORTS 4
+#define PRINT_MAC(addr) printf("%02"PRIx8":%02"PRIx8":%02"PRIx8 \
+ ":%02"PRIx8":%02"PRIx8":%02"PRIx8, \
+ addr.addr_bytes[0], addr.addr_bytes[1], addr.addr_bytes[2], \
+ addr.addr_bytes[3], addr.addr_bytes[4], addr.addr_bytes[5])
+
+uint8_t slaves[RTE_MAX_ETHPORTS];
+uint8_t slaves_count;
+
+static uint8_t BOND_PORT = 0xff;
+
+static struct rte_mempool *mbuf_pool;
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_NONE,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static void
+slave_port_init(uint8_t portid, struct rte_mempool *mbuf_pool)
+{
+ int retval;
+
+ if (portid >= rte_eth_dev_count())
+ rte_exit(EXIT_FAILURE, "Invalid port\n");
+
+ retval = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (retval != 0)
+ rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n",
+ portid, retval);
+
+ /* RX setup */
+ retval = rte_eth_rx_queue_setup(portid, 0, RTE_RX_DESC_DEFAULT,
+ rte_eth_dev_socket_id(portid), NULL,
+ mbuf_pool);
+ if (retval < 0)
+ rte_exit(retval, " port %u: RX queue 0 setup failed (res=%d)",
+ portid, retval);
+
+ /* TX setup */
+ retval = rte_eth_tx_queue_setup(portid, 0, RTE_TX_DESC_DEFAULT,
+ rte_eth_dev_socket_id(portid), NULL);
+
+ if (retval < 0)
+ rte_exit(retval, "port %u: TX queue 0 setup failed (res=%d)",
+ portid, retval);
+
+ retval = rte_eth_dev_start(portid);
+ if (retval < 0)
+ rte_exit(retval,
+ "Start port %d failed (res=%d)",
+ portid, retval);
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(portid, &addr);
+ printf("Port %u MAC: ", (unsigned)portid);
+ PRINT_MAC(addr);
+ printf("\n");
+}
+
+static void
+bond_port_init(struct rte_mempool *mbuf_pool)
+{
+ int retval;
+ uint8_t i;
+
+ retval = rte_eth_bond_create("bond0", BONDING_MODE_ALB,
+ 0 /*SOCKET_ID_ANY*/);
+ if (retval < 0)
+ rte_exit(EXIT_FAILURE,
+ "Faled to create bond port\n");
+
+ BOND_PORT = (uint8_t)retval;
+
+ retval = rte_eth_dev_configure(BOND_PORT, 1, 1, &port_conf);
+ if (retval != 0)
+ rte_exit(EXIT_FAILURE, "port %u: configuration failed (res=%d)\n",
+ BOND_PORT, retval);
+
+ /* RX setup */
+ retval = rte_eth_rx_queue_setup(BOND_PORT, 0, RTE_RX_DESC_DEFAULT,
+ rte_eth_dev_socket_id(BOND_PORT), NULL,
+ mbuf_pool);
+ if (retval < 0)
+ rte_exit(retval, " port %u: RX queue 0 setup failed (res=%d)",
+ BOND_PORT, retval);
+
+ /* TX setup */
+ retval = rte_eth_tx_queue_setup(BOND_PORT, 0, RTE_TX_DESC_DEFAULT,
+ rte_eth_dev_socket_id(BOND_PORT), NULL);
+
+ if (retval < 0)
+ rte_exit(retval, "port %u: TX queue 0 setup failed (res=%d)",
+ BOND_PORT, retval);
+
+ for (i = 0; i < slaves_count; i++) {
+ if (rte_eth_bond_slave_add(BOND_PORT, slaves[i]) == -1)
+ rte_exit(-1, "Oooops! adding slave (%u) to bond (%u) failed!\n",
+ slaves[i], BOND_PORT);
+
+ }
+
+ retval = rte_eth_dev_start(BOND_PORT);
+ if (retval < 0)
+ rte_exit(retval, "Start port %d failed (res=%d)", BOND_PORT, retval);
+
+ rte_eth_promiscuous_enable(BOND_PORT);
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(BOND_PORT, &addr);
+ printf("Port %u MAC: ", (unsigned)BOND_PORT);
+ PRINT_MAC(addr);
+ printf("\n");
+}
+
+static inline size_t
+get_vlan_offset(struct ether_hdr *eth_hdr, uint16_t *proto)
+{
+ size_t vlan_offset = 0;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+
+ vlan_offset = sizeof(struct vlan_hdr);
+ *proto = vlan_hdr->eth_proto;
+
+ if (rte_cpu_to_be_16(ETHER_TYPE_VLAN) == *proto) {
+ vlan_hdr = vlan_hdr + 1;
+
+ *proto = vlan_hdr->eth_proto;
+ vlan_offset += sizeof(struct vlan_hdr);
+ }
+ }
+ return vlan_offset;
+}
+
+struct global_flag_stru_t {
+ int LcoreMainIsRunning;
+ int LcoreMainCore;
+ uint32_t port_packets[4];
+ rte_spinlock_t lock;
+};
+struct global_flag_stru_t global_flag_stru;
+struct global_flag_stru_t *global_flag_stru_p = &global_flag_stru;
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static int lcore_main(__attribute__((unused)) void *arg1)
+{
+ struct rte_mbuf *pkts[MAX_PKT_BURST] __rte_cache_aligned;
+ struct ether_addr d_addr;
+
+ struct ether_hdr *eth_hdr;
+ struct arp_hdr *arp_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ uint16_t ether_type, offset;
+
+ uint16_t rx_cnt;
+ uint32_t bond_ip;
+ int i = 0;
+ uint8_t is_free;
+
+ bond_ip = BOND_IP_1 | (BOND_IP_2 << 8) |
+ (BOND_IP_3 << 16) | (BOND_IP_4 << 24);
+
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+
+ while (global_flag_stru_p->LcoreMainIsRunning) {
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ rx_cnt = rte_eth_rx_burst(BOND_PORT, 0, pkts, MAX_PKT_BURST);
+ is_free = 0;
+
+ /* If didn't receive any packets, wait and go to next iteration */
+ if (rx_cnt == 0) {
+ rte_delay_us(50);
+ continue;
+ }
+
+ /* Search incoming data for ARP packets and prepare response */
+ for (i = 0; i < rx_cnt; i++) {
+ if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) {
+ global_flag_stru_p->port_packets[0]++;
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ }
+ eth_hdr = rte_pktmbuf_mtod(pkts[i], struct ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))
+ printf("VLAN taged frame, offset:");
+ offset = get_vlan_offset(eth_hdr, &ether_type);
+ if (offset > 0)
+ printf("%d\n", offset);
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_ARP)) {
+ if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) {
+ global_flag_stru_p->port_packets[1]++;
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ }
+ arp_hdr = (struct arp_hdr *)((char *)(eth_hdr + 1) + offset);
+ if (arp_hdr->arp_data.arp_tip == bond_ip) {
+ if (arp_hdr->arp_op == rte_cpu_to_be_16(ARP_OP_REQUEST)) {
+ arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REPLY);
+ /* Switch src and dst data and set bonding MAC */
+ ether_addr_copy(&eth_hdr->s_addr, &eth_hdr->d_addr);
+ rte_eth_macaddr_get(BOND_PORT, &eth_hdr->s_addr);
+ ether_addr_copy(&arp_hdr->arp_data.arp_sha, &arp_hdr->arp_data.arp_tha);
+ arp_hdr->arp_data.arp_tip = arp_hdr->arp_data.arp_sip;
+ rte_eth_macaddr_get(BOND_PORT, &d_addr);
+ ether_addr_copy(&d_addr, &arp_hdr->arp_data.arp_sha);
+ arp_hdr->arp_data.arp_sip = bond_ip;
+ rte_eth_tx_burst(BOND_PORT, 0, &pkts[i], 1);
+ is_free = 1;
+ } else {
+ rte_eth_tx_burst(BOND_PORT, 0, NULL, 0);
+ }
+ }
+ } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ if (rte_spinlock_trylock(&global_flag_stru_p->lock) == 1) {
+ global_flag_stru_p->port_packets[2]++;
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ }
+ ipv4_hdr = (struct ipv4_hdr *)((char *)(eth_hdr + 1) + offset);
+ if (ipv4_hdr->dst_addr == bond_ip) {
+ ether_addr_copy(&eth_hdr->s_addr, &eth_hdr->d_addr);
+ rte_eth_macaddr_get(BOND_PORT, &eth_hdr->s_addr);
+ ipv4_hdr->dst_addr = ipv4_hdr->src_addr;
+ ipv4_hdr->src_addr = bond_ip;
+ rte_eth_tx_burst(BOND_PORT, 0, &pkts[i], 1);
+ }
+
+ }
+
+ /* Free processed packets */
+ if (is_free == 0)
+ rte_pktmbuf_free(pkts[i]);
+ }
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ }
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ printf("BYE lcore_main\n");
+ return 0;
+}
+
+struct cmd_obj_send_result {
+ cmdline_fixed_string_t action;
+ cmdline_ipaddr_t ip;
+};
+static inline void get_string(struct cmd_obj_send_result *res, char *buf, uint8_t size)
+{
+ snprintf(buf, size, NIPQUAD_FMT,
+ ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[0]),
+ ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[1]),
+ ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[2]),
+ ((unsigned)((unsigned char *)&(res->ip.addr.ipv4))[3])
+ );
+}
+static void cmd_obj_send_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+
+ struct cmd_obj_send_result *res = parsed_result;
+ char ip_str[INET6_ADDRSTRLEN];
+
+ struct rte_mbuf *created_pkt;
+ struct ether_hdr *eth_hdr;
+ struct arp_hdr *arp_hdr;
+
+ uint32_t bond_ip;
+ size_t pkt_size;
+
+ if (res->ip.family == AF_INET)
+ get_string(res, ip_str, INET_ADDRSTRLEN);
+ else
+ cmdline_printf(cl, "Wrong IP format. Only IPv4 is supported\n");
+
+ bond_ip = BOND_IP_1 | (BOND_IP_2 << 8) |
+ (BOND_IP_3 << 16) | (BOND_IP_4 << 24);
+
+ created_pkt = rte_pktmbuf_alloc(mbuf_pool);
+ pkt_size = sizeof(struct ether_hdr) + sizeof(struct arp_hdr);
+ created_pkt->data_len = pkt_size;
+ created_pkt->pkt_len = pkt_size;
+
+ eth_hdr = rte_pktmbuf_mtod(created_pkt, struct ether_hdr *);
+ rte_eth_macaddr_get(BOND_PORT, &eth_hdr->s_addr);
+ memset(&eth_hdr->d_addr, 0xFF, ETHER_ADDR_LEN);
+ eth_hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_ARP);
+
+ arp_hdr = (struct arp_hdr *)((char *)eth_hdr + sizeof(struct ether_hdr));
+ arp_hdr->arp_hrd = rte_cpu_to_be_16(ARP_HRD_ETHER);
+ arp_hdr->arp_pro = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ arp_hdr->arp_hln = ETHER_ADDR_LEN;
+ arp_hdr->arp_pln = sizeof(uint32_t);
+ arp_hdr->arp_op = rte_cpu_to_be_16(ARP_OP_REQUEST);
+
+ rte_eth_macaddr_get(BOND_PORT, &arp_hdr->arp_data.arp_sha);
+ arp_hdr->arp_data.arp_sip = bond_ip;
+ memset(&arp_hdr->arp_data.arp_tha, 0, ETHER_ADDR_LEN);
+ arp_hdr->arp_data.arp_tip =
+ ((unsigned char *)&res->ip.addr.ipv4)[0] |
+ (((unsigned char *)&res->ip.addr.ipv4)[1] << 8) |
+ (((unsigned char *)&res->ip.addr.ipv4)[2] << 16) |
+ (((unsigned char *)&res->ip.addr.ipv4)[3] << 24);
+ rte_eth_tx_burst(BOND_PORT, 0, &created_pkt, 1);
+
+ rte_delay_ms(100);
+ cmdline_printf(cl, "\n");
+}
+
+cmdline_parse_token_string_t cmd_obj_action_send =
+ TOKEN_STRING_INITIALIZER(struct cmd_obj_send_result, action, "send");
+cmdline_parse_token_ipaddr_t cmd_obj_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_obj_send_result, ip);
+
+cmdline_parse_inst_t cmd_obj_send = {
+ .f = cmd_obj_send_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "send client_ip",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_obj_action_send,
+ (void *)&cmd_obj_ip,
+ NULL,
+ },
+};
+
+struct cmd_start_result {
+ cmdline_fixed_string_t start;
+};
+
+static void cmd_start_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ int slave_core_id = rte_lcore_id();
+
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ if (global_flag_stru_p->LcoreMainIsRunning == 0) {
+ if (lcore_config[global_flag_stru_p->LcoreMainCore].state != WAIT) {
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ return;
+ }
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ } else {
+ cmdline_printf(cl, "lcore_main already running on core:%d\n",
+ global_flag_stru_p->LcoreMainCore);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ return;
+ }
+
+ /* start lcore main on core != master_core - ARP response thread */
+ slave_core_id = rte_get_next_lcore(rte_lcore_id(), 1, 0);
+ if ((slave_core_id >= RTE_MAX_LCORE) || (slave_core_id == 0))
+ return;
+
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ global_flag_stru_p->LcoreMainIsRunning = 1;
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ cmdline_printf(cl,
+ "Starting lcore_main on core %d:%d "
+ "Our IP:%d.%d.%d.%d\n",
+ slave_core_id,
+ rte_eal_remote_launch(lcore_main, NULL, slave_core_id),
+ BOND_IP_1,
+ BOND_IP_2,
+ BOND_IP_3,
+ BOND_IP_4
+ );
+}
+
+cmdline_parse_token_string_t cmd_start_start =
+ TOKEN_STRING_INITIALIZER(struct cmd_start_result, start, "start");
+
+cmdline_parse_inst_t cmd_start = {
+ .f = cmd_start_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "starts listening if not started at startup",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_start_start,
+ NULL,
+ },
+};
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl,
+ "ALB - link bonding mode 6 example\n"
+ "send IP - sends one ARPrequest thru bonding for IP.\n"
+ "start - starts listening ARPs.\n"
+ "stop - stops lcore_main.\n"
+ "show - shows some bond info: ex. active slaves etc.\n"
+ "help - prints help.\n"
+ "quit - terminate all threads and quit.\n"
+ );
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "show help",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+struct cmd_stop_result {
+ cmdline_fixed_string_t stop;
+};
+
+static void cmd_stop_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ if (global_flag_stru_p->LcoreMainIsRunning == 0) {
+ cmdline_printf(cl,
+ "lcore_main not running on core:%d\n",
+ global_flag_stru_p->LcoreMainCore);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ return;
+ }
+ global_flag_stru_p->LcoreMainIsRunning = 0;
+ rte_eal_wait_lcore(global_flag_stru_p->LcoreMainCore);
+ cmdline_printf(cl,
+ "lcore_main stopped on core:%d\n",
+ global_flag_stru_p->LcoreMainCore);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+}
+
+cmdline_parse_token_string_t cmd_stop_stop =
+ TOKEN_STRING_INITIALIZER(struct cmd_stop_result, stop, "stop");
+
+cmdline_parse_inst_t cmd_stop = {
+ .f = cmd_stop_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "this command do not handle any arguments",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_stop_stop,
+ NULL,
+ },
+};
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ if (global_flag_stru_p->LcoreMainIsRunning == 0) {
+ cmdline_printf(cl,
+ "lcore_main not running on core:%d\n",
+ global_flag_stru_p->LcoreMainCore);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ cmdline_quit(cl);
+ return;
+ }
+ global_flag_stru_p->LcoreMainIsRunning = 0;
+ rte_eal_wait_lcore(global_flag_stru_p->LcoreMainCore);
+ cmdline_printf(cl,
+ "lcore_main stopped on core:%d\n",
+ global_flag_stru_p->LcoreMainCore);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "this command do not handle any arguments",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+struct cmd_show_result {
+ cmdline_fixed_string_t show;
+};
+
+static void cmd_show_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ uint8_t slaves[16] = {0};
+ uint8_t len = 16;
+ struct ether_addr addr;
+ uint8_t i = 0;
+
+ while (i < slaves_count) {
+ rte_eth_macaddr_get(i, &addr);
+ PRINT_MAC(addr);
+ printf("\n");
+ i++;
+ }
+
+ rte_spinlock_trylock(&global_flag_stru_p->lock);
+ cmdline_printf(cl,
+ "Active_slaves:%d "
+ "packets received:Tot:%d Arp:%d IPv4:%d\n",
+ rte_eth_bond_active_slaves_get(BOND_PORT, slaves, len),
+ global_flag_stru_p->port_packets[0],
+ global_flag_stru_p->port_packets[1],
+ global_flag_stru_p->port_packets[2]);
+ rte_spinlock_unlock(&global_flag_stru_p->lock);
+}
+
+cmdline_parse_token_string_t cmd_show_show =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_result, show, "show");
+
+cmdline_parse_inst_t cmd_show = {
+ .f = cmd_show_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "this command do not handle any arguments",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_show_show,
+ NULL,
+ },
+};
+
+/****** CONTEXT (list of instruction) */
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_start,
+ (cmdline_parse_inst_t *)&cmd_obj_send,
+ (cmdline_parse_inst_t *)&cmd_stop,
+ (cmdline_parse_inst_t *)&cmd_show,
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_help,
+ NULL,
+};
+
+/* prompt function, called from main on MASTER lcore */
+static void prompt(__attribute__((unused)) void *arg1)
+{
+ struct cmdline *cl;
+
+ cl = cmdline_stdin_new(main_ctx, "bond6>");
+ if (cl != NULL) {
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ int ret;
+ uint8_t nb_ports, i;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ rte_eal_devargs_dump(stdout);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "Give at least one port\n");
+ else if (nb_ports > MAX_PORTS)
+ rte_exit(EXIT_FAILURE, "You can have max 4 ports\n");
+
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ slaves_count = nb_ports;
+ for (i = 0; i < nb_ports; i++) {
+ slave_port_init(i, mbuf_pool);
+ slaves[i] = i;
+ }
+
+ bond_port_init(mbuf_pool);
+
+ rte_spinlock_init(&global_flag_stru_p->lock);
+ int slave_core_id = rte_lcore_id();
+
+ /* check state of lcores */
+ RTE_LCORE_FOREACH_SLAVE(slave_core_id) {
+ if (lcore_config[slave_core_id].state != WAIT)
+ return -EBUSY;
+ }
+ /* start lcore main on core != master_core - ARP response thread */
+ slave_core_id = rte_get_next_lcore(rte_lcore_id(), 1, 0);
+ if ((slave_core_id >= RTE_MAX_LCORE) || (slave_core_id == 0))
+ return -EPERM;
+
+ global_flag_stru_p->LcoreMainIsRunning = 1;
+ global_flag_stru_p->LcoreMainCore = slave_core_id;
+ printf("Starting lcore_main on core %d:%d Our IP:%d.%d.%d.%d\n",
+ slave_core_id,
+ rte_eal_remote_launch((lcore_function_t *)lcore_main,
+ NULL,
+ slave_core_id),
+ BOND_IP_1,
+ BOND_IP_2,
+ BOND_IP_3,
+ BOND_IP_4
+ );
+
+ /* Start prompt for user interact */
+ prompt(NULL);
+
+ rte_delay_ms(100);
+ return 0;
+}
diff --git a/examples/bond/main.h b/examples/bond/main.h
new file mode 100644
index 00000000..ea331e56
--- /dev/null
+++ b/examples/bond/main.h
@@ -0,0 +1,39 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+int main(int argc, char *argv[]);
+
+#endif /* ifndef _MAIN_H_ */
diff --git a/examples/cmdline/Makefile b/examples/cmdline/Makefile
new file mode 100644
index 00000000..9ebe4355
--- /dev/null
+++ b/examples/cmdline/Makefile
@@ -0,0 +1,51 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = cmdline
+
+# all source are stored in SRCS-y
+SRCS-y := main.c commands.c parse_obj_list.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS_parse_obj_list.o := -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/cmdline/commands.c b/examples/cmdline/commands.c
new file mode 100644
index 00000000..f3ba2476
--- /dev/null
+++ b/examples/cmdline/commands.c
@@ -0,0 +1,283 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of California, Berkeley nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <termios.h>
+#ifndef __linux__
+ #ifdef __FreeBSD__
+ #include <sys/socket.h>
+ #else
+ #include <net/socket.h>
+ #endif
+#endif
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+
+#include <rte_string_fns.h>
+
+#include "parse_obj_list.h"
+
+struct object_list global_obj_list;
+
+/* not defined under linux */
+#ifndef NIPQUAD
+#define NIPQUAD_FMT "%u.%u.%u.%u"
+#define NIPQUAD(addr) \
+ (unsigned)((unsigned char *)&addr)[0], \
+ (unsigned)((unsigned char *)&addr)[1], \
+ (unsigned)((unsigned char *)&addr)[2], \
+ (unsigned)((unsigned char *)&addr)[3]
+#endif
+
+#ifndef NIP6
+#define NIP6_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+#define NIP6(addr) \
+ (unsigned)((addr).s6_addr[0]), \
+ (unsigned)((addr).s6_addr[1]), \
+ (unsigned)((addr).s6_addr[2]), \
+ (unsigned)((addr).s6_addr[3]), \
+ (unsigned)((addr).s6_addr[4]), \
+ (unsigned)((addr).s6_addr[5]), \
+ (unsigned)((addr).s6_addr[6]), \
+ (unsigned)((addr).s6_addr[7]), \
+ (unsigned)((addr).s6_addr[8]), \
+ (unsigned)((addr).s6_addr[9]), \
+ (unsigned)((addr).s6_addr[10]), \
+ (unsigned)((addr).s6_addr[11]), \
+ (unsigned)((addr).s6_addr[12]), \
+ (unsigned)((addr).s6_addr[13]), \
+ (unsigned)((addr).s6_addr[14]), \
+ (unsigned)((addr).s6_addr[15])
+#endif
+
+
+/**********************************************************/
+
+struct cmd_obj_del_show_result {
+ cmdline_fixed_string_t action;
+ struct object *obj;
+};
+
+static void cmd_obj_del_show_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_obj_del_show_result *res = parsed_result;
+ char ip_str[INET6_ADDRSTRLEN];
+
+ if (res->obj->ip.family == AF_INET)
+ snprintf(ip_str, sizeof(ip_str), NIPQUAD_FMT,
+ NIPQUAD(res->obj->ip.addr.ipv4));
+ else
+ snprintf(ip_str, sizeof(ip_str), NIP6_FMT,
+ NIP6(res->obj->ip.addr.ipv6));
+
+ if (strcmp(res->action, "del") == 0) {
+ SLIST_REMOVE(&global_obj_list, res->obj, object, next);
+ cmdline_printf(cl, "Object %s removed, ip=%s\n",
+ res->obj->name, ip_str);
+ free(res->obj);
+ }
+ else if (strcmp(res->action, "show") == 0) {
+ cmdline_printf(cl, "Object %s, ip=%s\n",
+ res->obj->name, ip_str);
+ }
+}
+
+cmdline_parse_token_string_t cmd_obj_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_obj_del_show_result,
+ action, "show#del");
+parse_token_obj_list_t cmd_obj_obj =
+ TOKEN_OBJ_LIST_INITIALIZER(struct cmd_obj_del_show_result, obj,
+ &global_obj_list);
+
+cmdline_parse_inst_t cmd_obj_del_show = {
+ .f = cmd_obj_del_show_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "Show/del an object",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_obj_action,
+ (void *)&cmd_obj_obj,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_obj_add_result {
+ cmdline_fixed_string_t action;
+ cmdline_fixed_string_t name;
+ cmdline_ipaddr_t ip;
+};
+
+static void cmd_obj_add_parsed(void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_obj_add_result *res = parsed_result;
+ struct object *o;
+ char ip_str[INET6_ADDRSTRLEN];
+
+ SLIST_FOREACH(o, &global_obj_list, next) {
+ if (!strcmp(res->name, o->name)) {
+ cmdline_printf(cl, "Object %s already exist\n", res->name);
+ return;
+ }
+ break;
+ }
+
+ o = malloc(sizeof(*o));
+ if (!o) {
+ cmdline_printf(cl, "mem error\n");
+ return;
+ }
+ snprintf(o->name, sizeof(o->name), "%s", res->name);
+ o->ip = res->ip;
+ SLIST_INSERT_HEAD(&global_obj_list, o, next);
+
+ if (o->ip.family == AF_INET)
+ snprintf(ip_str, sizeof(ip_str), NIPQUAD_FMT,
+ NIPQUAD(o->ip.addr.ipv4));
+ else
+ snprintf(ip_str, sizeof(ip_str), NIP6_FMT,
+ NIP6(o->ip.addr.ipv6));
+
+ cmdline_printf(cl, "Object %s added, ip=%s\n",
+ o->name, ip_str);
+}
+
+cmdline_parse_token_string_t cmd_obj_action_add =
+ TOKEN_STRING_INITIALIZER(struct cmd_obj_add_result, action, "add");
+cmdline_parse_token_string_t cmd_obj_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_obj_add_result, name, NULL);
+cmdline_parse_token_ipaddr_t cmd_obj_ip =
+ TOKEN_IPADDR_INITIALIZER(struct cmd_obj_add_result, ip);
+
+cmdline_parse_inst_t cmd_obj_add = {
+ .f = cmd_obj_add_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "Add an object (name, val)",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_obj_action_add,
+ (void *)&cmd_obj_name,
+ (void *)&cmd_obj_ip,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl,
+ "Demo example of command line interface in RTE\n\n"
+ "This is a readline-like interface that can be used to\n"
+ "debug your RTE application. It supports some features\n"
+ "of GNU readline like completion, cut/paste, and some\n"
+ "other special bindings.\n\n"
+ "This demo shows how rte_cmdline library can be\n"
+ "extended to handle a list of objects. There are\n"
+ "3 commands:\n"
+ "- add obj_name IP\n"
+ "- del obj_name\n"
+ "- show obj_name\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "show help",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+
+/**********************************************************/
+/**********************************************************/
+/****** CONTEXT (list of instruction) */
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_obj_del_show,
+ (cmdline_parse_inst_t *)&cmd_obj_add,
+ (cmdline_parse_inst_t *)&cmd_help,
+ NULL,
+};
diff --git a/examples/cmdline/commands.h b/examples/cmdline/commands.h
new file mode 100644
index 00000000..712894b1
--- /dev/null
+++ b/examples/cmdline/commands.h
@@ -0,0 +1,39 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _COMMANDS_H_
+#define _COMMANDS_H_
+
+extern cmdline_parse_ctx_t main_ctx[];
+
+#endif /* _COMMANDS_H_ */
diff --git a/examples/cmdline/main.c b/examples/cmdline/main.c
new file mode 100644
index 00000000..c966df03
--- /dev/null
+++ b/examples/cmdline/main.c
@@ -0,0 +1,96 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of California, Berkeley nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <termios.h>
+#include <sys/queue.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_debug.h>
+
+#include "commands.h"
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ cl = cmdline_stdin_new(main_ctx, "example> ");
+ if (cl == NULL)
+ rte_panic("Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
+}
diff --git a/examples/cmdline/parse_obj_list.c b/examples/cmdline/parse_obj_list.c
new file mode 100644
index 00000000..cdbaf2fe
--- /dev/null
+++ b/examples/cmdline/parse_obj_list.c
@@ -0,0 +1,166 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of California, Berkeley nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <ctype.h>
+#include <string.h>
+#include <netinet/in.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_ipaddr.h>
+
+#include <rte_string_fns.h>
+
+#include "parse_obj_list.h"
+
+/* This file is an example of extension of libcmdline. It provides an
+ * example of objects stored in a list. */
+
+struct cmdline_token_ops token_obj_list_ops = {
+ .parse = parse_obj_list,
+ .complete_get_nb = complete_get_nb_obj_list,
+ .complete_get_elt = complete_get_elt_obj_list,
+ .get_help = get_help_obj_list,
+};
+
+int
+parse_obj_list(cmdline_parse_token_hdr_t *tk, const char *buf, void *res,
+ unsigned ressize)
+{
+ struct token_obj_list *tk2 = (struct token_obj_list *)tk;
+ struct token_obj_list_data *tkd = &tk2->obj_list_data;
+ struct object *o;
+ unsigned int token_len = 0;
+
+ if (*buf == 0)
+ return -1;
+
+ if (res && ressize < sizeof(struct object *))
+ return -1;
+
+ while(!cmdline_isendoftoken(buf[token_len]))
+ token_len++;
+
+ SLIST_FOREACH(o, tkd->list, next) {
+ if (token_len != strnlen(o->name, OBJ_NAME_LEN_MAX))
+ continue;
+ if (strncmp(buf, o->name, token_len))
+ continue;
+ break;
+ }
+ if (!o) /* not found */
+ return -1;
+
+ /* store the address of object in structure */
+ if (res)
+ *(struct object **)res = o;
+
+ return token_len;
+}
+
+int complete_get_nb_obj_list(cmdline_parse_token_hdr_t *tk)
+{
+ struct token_obj_list *tk2 = (struct token_obj_list *)tk;
+ struct token_obj_list_data *tkd = &tk2->obj_list_data;
+ struct object *o;
+ int ret = 0;
+
+ SLIST_FOREACH(o, tkd->list, next) {
+ ret ++;
+ }
+ return ret;
+}
+
+int complete_get_elt_obj_list(cmdline_parse_token_hdr_t *tk,
+ int idx, char *dstbuf, unsigned int size)
+{
+ struct token_obj_list *tk2 = (struct token_obj_list *)tk;
+ struct token_obj_list_data *tkd = &tk2->obj_list_data;
+ struct object *o;
+ int i = 0;
+ unsigned len;
+
+ SLIST_FOREACH(o, tkd->list, next) {
+ if (i++ == idx)
+ break;
+ }
+ if (!o)
+ return -1;
+
+ len = strnlen(o->name, OBJ_NAME_LEN_MAX);
+ if ((len + 1) > size)
+ return -1;
+
+ if (dstbuf)
+ snprintf(dstbuf, size, "%s", o->name);
+
+ return 0;
+}
+
+
+int get_help_obj_list(__attribute__((unused)) cmdline_parse_token_hdr_t *tk,
+ char *dstbuf, unsigned int size)
+{
+ snprintf(dstbuf, size, "Obj-List");
+ return 0;
+}
diff --git a/examples/cmdline/parse_obj_list.h b/examples/cmdline/parse_obj_list.h
new file mode 100644
index 00000000..871c53a4
--- /dev/null
+++ b/examples/cmdline/parse_obj_list.h
@@ -0,0 +1,112 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 2009, Olivier MATZ <zer0@droids-corp.org>
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of the University of California, Berkeley nor the
+ * names of its contributors may be used to endorse or promote products
+ * derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PARSE_OBJ_LIST_H_
+#define _PARSE_OBJ_LIST_H_
+
+/* This file is an example of extension of libcmdline. It provides an
+ * example of objects stored in a list. */
+
+#include <sys/queue.h>
+#include <cmdline_parse.h>
+
+#define OBJ_NAME_LEN_MAX 64
+
+struct object {
+ SLIST_ENTRY(object) next;
+ char name[OBJ_NAME_LEN_MAX];
+ cmdline_ipaddr_t ip;
+};
+
+/* define struct object_list */
+SLIST_HEAD(object_list, object);
+
+/* data is a pointer to a list */
+struct token_obj_list_data {
+ struct object_list *list;
+};
+
+struct token_obj_list {
+ struct cmdline_token_hdr hdr;
+ struct token_obj_list_data obj_list_data;
+};
+typedef struct token_obj_list parse_token_obj_list_t;
+
+extern struct cmdline_token_ops token_obj_list_ops;
+
+int parse_obj_list(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res,
+ unsigned ressize);
+int complete_get_nb_obj_list(cmdline_parse_token_hdr_t *tk);
+int complete_get_elt_obj_list(cmdline_parse_token_hdr_t *tk, int idx,
+ char *dstbuf, unsigned int size);
+int get_help_obj_list(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned int size);
+
+#define TOKEN_OBJ_LIST_INITIALIZER(structure, field, obj_list_ptr) \
+{ \
+ .hdr = { \
+ .ops = &token_obj_list_ops, \
+ .offset = offsetof(structure, field), \
+ }, \
+ .obj_list_data = { \
+ .list = obj_list_ptr, \
+ }, \
+}
+
+#endif /* _PARSE_OBJ_LIST_H_ */
diff --git a/examples/distributor/Makefile b/examples/distributor/Makefile
new file mode 100644
index 00000000..6a5badaa
--- /dev/null
+++ b/examples/distributor/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = distributor_app
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/distributor/main.c b/examples/distributor/main.c
new file mode 100644
index 00000000..c0201a9e
--- /dev/null
+++ b/examples/distributor/main.c
@@ -0,0 +1,600 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <signal.h>
+#include <getopt.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_debug.h>
+#include <rte_prefetch.h>
+#include <rte_distributor.h>
+
+#define RX_RING_SIZE 256
+#define TX_RING_SIZE 512
+#define NUM_MBUFS ((64*1024)-1)
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+#define RTE_RING_SZ 1024
+
+/* uncommnet below line to enable debug logs */
+/* #define DEBUG */
+
+#ifdef DEBUG
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) do { \
+ RTE_LOG(DEBUG, log_type, fmt, ##args); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#endif
+
+#define RTE_LOGTYPE_DISTRAPP RTE_LOGTYPE_USER1
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+volatile uint8_t quit_signal;
+volatile uint8_t quit_signal_rx;
+
+static volatile struct app_stats {
+ struct {
+ uint64_t rx_pkts;
+ uint64_t returned_pkts;
+ uint64_t enqueued_pkts;
+ } rx __rte_cache_aligned;
+
+ struct {
+ uint64_t dequeue_pkts;
+ uint64_t tx_pkts;
+ } tx __rte_cache_aligned;
+} app_stats;
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
+ ETH_RSS_TCP | ETH_RSS_SCTP,
+ }
+ },
+};
+
+struct output_buffer {
+ unsigned count;
+ struct rte_mbuf *mbufs[BURST_SIZE];
+};
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rxRings = 1, txRings = rte_lcore_count() - 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rxRings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < txRings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct rte_eth_link link;
+ rte_eth_link_get_nowait(port, &link);
+ if (!link.link_status) {
+ sleep(1);
+ rte_eth_link_get_nowait(port, &link);
+ }
+
+ if (!link.link_status) {
+ printf("Link down on port %"PRIu8"\n", port);
+ return 0;
+ }
+
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+struct lcore_params {
+ unsigned worker_id;
+ struct rte_distributor *d;
+ struct rte_ring *r;
+ struct rte_mempool *mem_pool;
+};
+
+static void
+quit_workers(struct rte_distributor *d, struct rte_mempool *p)
+{
+ const unsigned num_workers = rte_lcore_count() - 2;
+ unsigned i;
+ struct rte_mbuf *bufs[num_workers];
+ rte_mempool_get_bulk(p, (void *)bufs, num_workers);
+
+ for (i = 0; i < num_workers; i++)
+ bufs[i]->hash.rss = i << 1;
+
+ rte_distributor_process(d, bufs, num_workers);
+ rte_mempool_put_bulk(p, (void *)bufs, num_workers);
+}
+
+static int
+lcore_rx(struct lcore_params *p)
+{
+ struct rte_distributor *d = p->d;
+ struct rte_mempool *mem_pool = p->mem_pool;
+ struct rte_ring *r = p->r;
+ const uint8_t nb_ports = rte_eth_dev_count();
+ const int socket_id = rte_socket_id();
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << port)) == 0)
+ continue;
+
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) != socket_id)
+ printf("WARNING, port %u is on remote NUMA node to "
+ "RX thread.\n\tPerformance will not "
+ "be optimal.\n", port);
+ }
+
+ printf("\nCore %u doing packet RX.\n", rte_lcore_id());
+ port = 0;
+ while (!quit_signal_rx) {
+
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << port)) == 0) {
+ if (++port == nb_ports)
+ port = 0;
+ continue;
+ }
+ struct rte_mbuf *bufs[BURST_SIZE*2];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0, bufs,
+ BURST_SIZE);
+ app_stats.rx.rx_pkts += nb_rx;
+
+ rte_distributor_process(d, bufs, nb_rx);
+ const uint16_t nb_ret = rte_distributor_returned_pkts(d,
+ bufs, BURST_SIZE*2);
+ app_stats.rx.returned_pkts += nb_ret;
+ if (unlikely(nb_ret == 0))
+ continue;
+
+ uint16_t sent = rte_ring_enqueue_burst(r, (void *)bufs, nb_ret);
+ app_stats.rx.enqueued_pkts += sent;
+ if (unlikely(sent < nb_ret)) {
+ LOG_DEBUG(DISTRAPP, "%s:Packet loss due to full ring\n", __func__);
+ while (sent < nb_ret)
+ rte_pktmbuf_free(bufs[sent++]);
+ }
+ if (++port == nb_ports)
+ port = 0;
+ }
+ rte_distributor_process(d, NULL, 0);
+ /* flush distributor to bring to known state */
+ rte_distributor_flush(d);
+ /* set worker & tx threads quit flag */
+ quit_signal = 1;
+ /*
+ * worker threads may hang in get packet as
+ * distributor process is not running, just make sure workers
+ * get packets till quit_signal is actually been
+ * received and they gracefully shutdown
+ */
+ quit_workers(d, mem_pool);
+ /* rx thread should quit at last */
+ return 0;
+}
+
+static inline void
+flush_one_port(struct output_buffer *outbuf, uint8_t outp)
+{
+ unsigned nb_tx = rte_eth_tx_burst(outp, 0, outbuf->mbufs,
+ outbuf->count);
+ app_stats.tx.tx_pkts += nb_tx;
+
+ if (unlikely(nb_tx < outbuf->count)) {
+ LOG_DEBUG(DISTRAPP, "%s:Packet loss with tx_burst\n", __func__);
+ do {
+ rte_pktmbuf_free(outbuf->mbufs[nb_tx]);
+ } while (++nb_tx < outbuf->count);
+ }
+ outbuf->count = 0;
+}
+
+static inline void
+flush_all_ports(struct output_buffer *tx_buffers, uint8_t nb_ports)
+{
+ uint8_t outp;
+ for (outp = 0; outp < nb_ports; outp++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << outp)) == 0)
+ continue;
+
+ if (tx_buffers[outp].count == 0)
+ continue;
+
+ flush_one_port(&tx_buffers[outp], outp);
+ }
+}
+
+static int
+lcore_tx(struct rte_ring *in_r)
+{
+ static struct output_buffer tx_buffers[RTE_MAX_ETHPORTS];
+ const uint8_t nb_ports = rte_eth_dev_count();
+ const int socket_id = rte_socket_id();
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << port)) == 0)
+ continue;
+
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) != socket_id)
+ printf("WARNING, port %u is on remote NUMA node to "
+ "TX thread.\n\tPerformance will not "
+ "be optimal.\n", port);
+ }
+
+ printf("\nCore %u doing packet TX.\n", rte_lcore_id());
+ while (!quit_signal) {
+
+ for (port = 0; port < nb_ports; port++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << port)) == 0)
+ continue;
+
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_ring_dequeue_burst(in_r,
+ (void *)bufs, BURST_SIZE);
+ app_stats.tx.dequeue_pkts += nb_rx;
+
+ /* if we get no traffic, flush anything we have */
+ if (unlikely(nb_rx == 0)) {
+ flush_all_ports(tx_buffers, nb_ports);
+ continue;
+ }
+
+ /* for traffic we receive, queue it up for transmit */
+ uint16_t i;
+ rte_prefetch_non_temporal((void *)bufs[0]);
+ rte_prefetch_non_temporal((void *)bufs[1]);
+ rte_prefetch_non_temporal((void *)bufs[2]);
+ for (i = 0; i < nb_rx; i++) {
+ struct output_buffer *outbuf;
+ uint8_t outp;
+ rte_prefetch_non_temporal((void *)bufs[i + 3]);
+ /*
+ * workers should update in_port to hold the
+ * output port value
+ */
+ outp = bufs[i]->port;
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << outp)) == 0)
+ continue;
+
+ outbuf = &tx_buffers[outp];
+ outbuf->mbufs[outbuf->count++] = bufs[i];
+ if (outbuf->count == BURST_SIZE)
+ flush_one_port(outbuf, outp);
+ }
+ }
+ }
+ return 0;
+}
+
+static void
+int_handler(int sig_num)
+{
+ printf("Exiting on signal %d\n", sig_num);
+ /* set quit flag for rx thread to exit */
+ quit_signal_rx = 1;
+}
+
+static void
+print_stats(void)
+{
+ struct rte_eth_stats eth_stats;
+ unsigned i;
+
+ printf("\nRX thread stats:\n");
+ printf(" - Received: %"PRIu64"\n", app_stats.rx.rx_pkts);
+ printf(" - Processed: %"PRIu64"\n", app_stats.rx.returned_pkts);
+ printf(" - Enqueued: %"PRIu64"\n", app_stats.rx.enqueued_pkts);
+
+ printf("\nTX thread stats:\n");
+ printf(" - Dequeued: %"PRIu64"\n", app_stats.tx.dequeue_pkts);
+ printf(" - Transmitted: %"PRIu64"\n", app_stats.tx.tx_pkts);
+
+ for (i = 0; i < rte_eth_dev_count(); i++) {
+ rte_eth_stats_get(i, &eth_stats);
+ printf("\nPort %u stats:\n", i);
+ printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
+ printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
+ printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
+ printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
+ printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
+ }
+}
+
+static int
+lcore_worker(struct lcore_params *p)
+{
+ struct rte_distributor *d = p->d;
+ const unsigned id = p->worker_id;
+ /*
+ * for single port, xor_val will be zero so we won't modify the output
+ * port, otherwise we send traffic from 0 to 1, 2 to 3, and vice versa
+ */
+ const unsigned xor_val = (rte_eth_dev_count() > 1);
+ struct rte_mbuf *buf = NULL;
+
+ printf("\nCore %u acting as worker core.\n", rte_lcore_id());
+ while (!quit_signal) {
+ buf = rte_distributor_get_pkt(d, id, buf);
+ buf->port ^= xor_val;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
+ prgname);
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind <= 1) {
+ print_usage(prgname);
+ return -1;
+ }
+
+ argv[optind-1] = prgname;
+
+ optind = 0; /* reset getopt lib */
+ return 0;
+}
+
+/* Main function, does initialization and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ struct rte_distributor *d;
+ struct rte_ring *output_ring;
+ unsigned lcore_id, worker_id = 0;
+ unsigned nb_ports;
+ uint8_t portid;
+ uint8_t nb_ports_available;
+
+ /* catch ctrl-c so we can print on exit */
+ signal(SIGINT, int_handler);
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid distributor parameters\n");
+
+ if (rte_lcore_count() < 3)
+ rte_exit(EXIT_FAILURE, "Error, This application needs at "
+ "least 3 logical cores to run:\n"
+ "1 lcore for packet RX and distribution\n"
+ "1 lcore for packet TX\n"
+ "and at least 1 lcore for worker threads\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
+ if (nb_ports != 1 && (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
+ "when using a single port\n");
+
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+ nb_ports_available = nb_ports;
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... done\n", (unsigned) portid);
+
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
+ portid);
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ d = rte_distributor_create("PKT_DIST", rte_socket_id(),
+ rte_lcore_count() - 2);
+ if (d == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create distributor\n");
+
+ /*
+ * scheduler ring is read only by the transmitter core, but written to
+ * by multiple threads
+ */
+ output_ring = rte_ring_create("Output_ring", RTE_RING_SZ,
+ rte_socket_id(), RING_F_SC_DEQ);
+ if (output_ring == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create output ring\n");
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (worker_id == rte_lcore_count() - 2)
+ rte_eal_remote_launch((lcore_function_t *)lcore_tx,
+ output_ring, lcore_id);
+ else {
+ struct lcore_params *p =
+ rte_malloc(NULL, sizeof(*p), 0);
+ if (!p)
+ rte_panic("malloc failure\n");
+ *p = (struct lcore_params){worker_id, d, output_ring, mbuf_pool};
+
+ rte_eal_remote_launch((lcore_function_t *)lcore_worker,
+ p, lcore_id);
+ }
+ worker_id++;
+ }
+ /* call lcore_main on master core only */
+ struct lcore_params p = { 0, d, output_ring, mbuf_pool};
+ lcore_rx(&p);
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ print_stats();
+ return 0;
+}
diff --git a/examples/dpdk_qat/Makefile b/examples/dpdk_qat/Makefile
new file mode 100644
index 00000000..01d61bcf
--- /dev/null
+++ b/examples/dpdk_qat/Makefile
@@ -0,0 +1,93 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+ifeq ($(ICP_ROOT),)
+$(error "Please define ICP_ROOT environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+LBITS := $(shell uname -p)
+ifeq ($(CROSS_COMPILE),)
+ ifneq ($(CONFIG_RTE_ARCH),"x86_64")
+ ifneq ($(LBITS),i686)
+ $(error The RTE_TARGET chosen is not compatible with this environment \
+ (x86_64), for this application. Please change the definition of the \
+ RTE_TARGET environment variable, or run the application on a i686 OS)
+ endif
+ endif
+endif
+
+# binary name
+APP = dpdk_qat
+
+# all source are stored in SRCS-y
+SRCS-y := main.c crypto.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -I$(ICP_ROOT)/quickassist/include \
+ -I$(ICP_ROOT)/quickassist/include/lac \
+ -I$(ICP_ROOT)/quickassist/lookaside/access_layer/include
+
+# From CRF 1.2 driver, library was renamed to libicp_qa_al.a
+ifneq ($(wildcard $(ICP_ROOT)/build/icp_qa_al.a),)
+ICP_LIBRARY_PATH = $(ICP_ROOT)/build/icp_qa_al.a
+else
+ICP_LIBRARY_PATH = $(ICP_ROOT)/build/libicp_qa_al.a
+endif
+
+LDLIBS += -L$(ICP_ROOT)/build
+LDLIBS += $(ICP_LIBRARY_PATH) \
+ -lz \
+ -losal \
+ -ladf_proxy \
+ -lcrypto
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf b/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf
new file mode 100644
index 00000000..fd139e2f
--- /dev/null
+++ b/examples/dpdk_qat/config_files/coleto/dh895xcc_qa_dev0.conf
@@ -0,0 +1,65 @@
+[GENERAL]
+ServicesEnabled = cy;dc
+ConfigVersion = 2
+cyHmacAuthMode = 1
+dcTotalSRAMAvailable = 0
+Firmware_MofPath = dh895xcc/mof_firmware.bin
+Firmware_MmpPath = dh895xcc/mmp_firmware.bin
+statsGeneral = 1
+statsDc = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+SRIOV_Enabled = 0
+ProcDebug = 1
+
+[KERNEL]
+NumberCyInstances = 0
+NumberDcInstances = 0
+
+[SSL]
+NumberCyInstances = 8
+NumberDcInstances = 0
+NumProcesses = 1
+LimitDevAccess = 0
+
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+Cy0CoreAffinity = 0
+
+Cy1Name = "SSL1"
+Cy1IsPolled = 1
+Cy1CoreAffinity = 1
+
+Cy2Name = "SSL2"
+Cy2IsPolled = 1
+Cy2CoreAffinity = 2
+
+Cy3Name = "SSL3"
+Cy3IsPolled = 1
+Cy3CoreAffinity = 3
+
+
+Cy4Name = "SSL4"
+Cy4IsPolled = 1
+Cy4CoreAffinity = 4
+
+
+Cy5Name = "SSL5"
+Cy5IsPolled = 1
+Cy5CoreAffinity = 5
+
+Cy6Name = "SSL6"
+Cy6IsPolled = 1
+Cy6CoreAffinity = 6
+
+
+Cy7Name = "SSL7"
+Cy7IsPolled = 1
+Cy7CoreAffinity = 7
diff --git a/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf
new file mode 100644
index 00000000..9e1c1d11
--- /dev/null
+++ b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev0.conf
@@ -0,0 +1,293 @@
+#########################################################################
+#
+# @par
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# #########################################################################
+# ########################################################
+#
+# This file is the configuration for a single dh89xxcc_qa
+# device.
+#
+# Each device has up to two accelerators.
+# - The client may load balance between these
+# accelerators.
+# Each accelerator has 8 independent ring banks.
+# - The interrupt for each can be directed to a
+# specific core.
+# Each ring bank as 16 rings (hardware assisted queues).
+#
+#########################################################
+# General Section
+##############################################
+
+[GENERAL]
+ServicesEnabled = cy0;cy1
+
+# Use version 2 of the config file
+ConfigVersion = 2
+# Look Aside Cryptographic Configuration
+cyHmacAuthMode = 1
+
+# Look Aside Compression Configuration
+dcTotalSRAMAvailable = 0
+
+# Firmware Location Configuration
+Firmware_MofPath = mof_firmware.bin
+Firmware_MmpPath = mmp_firmware.bin
+
+#Default values for number of concurrent requests*/
+CyNumConcurrentSymRequests = 512
+CyNumConcurrentAsymRequests = 64
+DcNumConcurrentRequests = 512
+
+#Statistics, valid values: 1,0
+statsGeneral = 1
+statsDc = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+
+# Enables or disables Single Root Complex IO Virtualization.
+# If this is enabled (1) then SRIOV and VT-d need to be enabled in
+# BIOS and there can be no Cy or Dc instances created in PF (Dom0).
+# If this i disabled (0) then SRIOV and VT-d need to be disabled
+# in BIOS and Cy and/or Dc instances can be used in PF (Dom0)
+SRIOV_Enabled = 0
+
+#Debug feature, if set to 1 it enables additional entries in /proc filesystem
+ProcDebug = 1
+
+#######################################################
+#
+# Logical Instances Section
+# A logical instance allows each address domain
+# (kernel space and individual user space processes)
+# to configure rings (i.e. hardware assisted queues)
+# to be used by that address domain and to define the
+# behavior of that ring.
+#
+# The address domains are in the following format
+# - For kernel address domains
+# [KERNEL]
+# - For user process address domains
+# [xxxxx]
+# Where xxxxx may be any ascii value which uniquely identifies
+# the user mode process.
+# To allow the driver correctly configure the
+# logical instances associated with this user process,
+# the process must call the icp_sal_userStartMultiProcess(...)
+# passing the xxxxx string during process initialisation.
+# When the user space process is finished it must call
+# icp_sal_userStop(...) to free resources.
+# NumProcesses will indicate the maximum number of processes
+# that can call icp_sal_userStartMultiProcess on this instance.
+# Warning: the resources are preallocated: if NumProcesses
+# is too high, the driver will fail to load
+#
+# Items configurable by a logical instance are:
+# - Name of the logical instance
+# - The accelerator associated with this logical
+# instance
+# - The core the instance is affinitized to (optional)
+#
+# Note: Logical instances may not share the same ring, but
+# may share a ring bank.
+#
+# The format of the logical instances are:
+# - For crypto:
+# Cy<n>Name = "xxxx"
+# Cy<n>AcceleratorNumber = 0-3
+# Cy<n>CoreAffinity = 0-7
+#
+# - For Data Compression
+# Dc<n>Name = "xxxx"
+# Dc<n>AcceleratorNumber = 0-1
+# Dc<n>CoreAffinity = 0-7
+#
+# Where:
+# - n is the number of this logical instance starting at 0.
+# - xxxx may be any ascii value which identifies the logical instance.
+#
+# Note: for user space processes, a list of values can be specified for
+# the accelerator number and the core affinity: for example
+# Cy0AcceleratorNumber = 0,2
+# Cy0CoreAffinity = 0,2,4
+# These comma-separated lists will allow the multiple processes to use
+# different accelerators and cores, and will wrap around the numbers
+# in the list. In the above example, process 0 will use accelerator 0,
+# and process 1 will use accelerator 2
+#
+########################################################
+
+##############################################
+# Kernel Instances Section
+##############################################
+[KERNEL]
+NumberCyInstances = 0
+NumberDcInstances = 0
+
+##############################################
+# User Process Instance Section
+##############################################
+[SSL]
+NumberCyInstances = 16
+NumberDcInstances = 0
+NumProcesses = 1
+LimitDevAccess = 0
+
+# Crypto - User instance #0
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+Cy0AcceleratorNumber = 0
+# List of core affinities
+Cy0CoreAffinity = 0
+
+# Crypto - User instance #1
+Cy1Name = "SSL1"
+Cy1IsPolled = 1
+Cy1AcceleratorNumber = 1
+# List of core affinities
+Cy1CoreAffinity = 1
+
+# Crypto - User instance #2
+Cy2Name = "SSL2"
+Cy2IsPolled = 1
+Cy2AcceleratorNumber = 2
+# List of core affinities
+Cy2CoreAffinity = 2
+
+# Crypto - User instance #3
+Cy3Name = "SSL3"
+Cy3IsPolled = 1
+Cy3AcceleratorNumber = 3
+# List of core affinities
+Cy3CoreAffinity = 3
+
+# Crypto - User instance #4
+Cy4Name = "SSL4"
+Cy4IsPolled = 1
+Cy4AcceleratorNumber = 0
+# List of core affinities
+Cy4CoreAffinity = 4
+
+# Crypto - User instance #5
+Cy5Name = "SSL5"
+Cy5IsPolled = 1
+Cy5AcceleratorNumber = 1
+# List of core affinities
+Cy5CoreAffinity = 5
+
+# Crypto - User instance #6
+Cy6Name = "SSL6"
+Cy6IsPolled = 1
+Cy6AcceleratorNumber = 2
+# List of core affinities
+Cy6CoreAffinity = 6
+
+# Crypto - User instance #7
+Cy7Name = "SSL7"
+Cy7IsPolled = 1
+Cy7AcceleratorNumber = 3
+# List of core affinities
+Cy7CoreAffinity = 7
+
+# Crypto - User instance #8
+Cy8Name = "SSL8"
+Cy8IsPolled = 1
+Cy8AcceleratorNumber = 0
+# List of core affinities
+Cy8CoreAffinity = 16
+
+# Crypto - User instance #9
+Cy9Name = "SSL9"
+Cy9IsPolled = 1
+Cy9AcceleratorNumber = 1
+# List of core affinities
+Cy9CoreAffinity = 17
+
+# Crypto - User instance #10
+Cy10Name = "SSL10"
+Cy10IsPolled = 1
+Cy10AcceleratorNumber = 2
+# List of core affinities
+Cy10CoreAffinity = 18
+
+# Crypto - User instance #11
+Cy11Name = "SSL11"
+Cy11IsPolled = 1
+Cy11AcceleratorNumber = 3
+# List of core affinities
+Cy11CoreAffinity = 19
+
+# Crypto - User instance #12
+Cy12Name = "SSL12"
+Cy12IsPolled = 1
+Cy12AcceleratorNumber = 0
+# List of core affinities
+Cy12CoreAffinity = 20
+
+# Crypto - User instance #13
+Cy13Name = "SSL13"
+Cy13IsPolled = 1
+Cy13AcceleratorNumber = 1
+# List of core affinities
+Cy13CoreAffinity = 21
+
+# Crypto - User instance #14
+Cy14Name = "SSL14"
+Cy14IsPolled = 1
+Cy14AcceleratorNumber = 2
+# List of core affinities
+Cy14CoreAffinity = 22
+
+# Crypto - User instance #15
+Cy15Name = "SSL15"
+Cy15IsPolled = 1
+Cy15AcceleratorNumber = 3
+# List of core affinities
+Cy15CoreAffinity = 23
+
+
+
+##############################################
+# Wireless Process Instance Section
+##############################################
+[WIRELESS]
+NumberCyInstances = 0
+NumberDcInstances = 0
+NumProcesses = 0
diff --git a/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf
new file mode 100644
index 00000000..3e8d8b6b
--- /dev/null
+++ b/examples/dpdk_qat/config_files/shumway/dh89xxcc_qa_dev1.conf
@@ -0,0 +1,292 @@
+#########################################################################
+#
+# @par
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# #########################################################################
+# ########################################################
+#
+# This file is the configuration for a single dh89xxcc_qa
+# device.
+#
+# Each device has up to two accelerators.
+# - The client may load balance between these
+# accelerators.
+# Each accelerator has 8 independent ring banks.
+# - The interrupt for each can be directed to a
+# specific core.
+# Each ring bank as 16 rings (hardware assisted queues).
+#
+#########################################################
+# General Section
+##############################################
+
+[GENERAL]
+ServicesEnabled = cy0;cy1
+
+# Use version 2 of the config file
+ConfigVersion = 2
+# Look Aside Cryptographic Configuration
+cyHmacAuthMode = 1
+
+# Look Aside Compression Configuration
+dcTotalSRAMAvailable = 0
+
+# Firmware Location Configuration
+Firmware_MofPath = mof_firmware.bin
+Firmware_MmpPath = mmp_firmware.bin
+
+#Default values for number of concurrent requests*/
+CyNumConcurrentSymRequests = 512
+CyNumConcurrentAsymRequests = 64
+DcNumConcurrentRequests = 512
+
+#Statistics, valid values: 1,0
+statsGeneral = 1
+statsDc = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+
+# Enables or disables Single Root Complex IO Virtualization.
+# If this is enabled (1) then SRIOV and VT-d need to be enabled in
+# BIOS and there can be no Cy or Dc instances created in PF (Dom0).
+# If this i disabled (0) then SRIOV and VT-d need to be disabled
+# in BIOS and Cy and/or Dc instances can be used in PF (Dom0)
+SRIOV_Enabled = 0
+
+#Debug feature, if set to 1 it enables additional entries in /proc filesystem
+ProcDebug = 1
+
+#######################################################
+#
+# Logical Instances Section
+# A logical instance allows each address domain
+# (kernel space and individual user space processes)
+# to configure rings (i.e. hardware assisted queues)
+# to be used by that address domain and to define the
+# behavior of that ring.
+#
+# The address domains are in the following format
+# - For kernel address domains
+# [KERNEL]
+# - For user process address domains
+# [xxxxx]
+# Where xxxxx may be any ascii value which uniquely identifies
+# the user mode process.
+# To allow the driver correctly configure the
+# logical instances associated with this user process,
+# the process must call the icp_sal_userStartMultiProcess(...)
+# passing the xxxxx string during process initialisation.
+# When the user space process is finished it must call
+# icp_sal_userStop(...) to free resources.
+# NumProcesses will indicate the maximum number of processes
+# that can call icp_sal_userStartMultiProcess on this instance.
+# Warning: the resources are preallocated: if NumProcesses
+# is too high, the driver will fail to load
+#
+# Items configurable by a logical instance are:
+# - Name of the logical instance
+# - The accelerator associated with this logical
+# instance
+# - The core the instance is affinitized to (optional)
+#
+# Note: Logical instances may not share the same ring, but
+# may share a ring bank.
+#
+# The format of the logical instances are:
+# - For crypto:
+# Cy<n>Name = "xxxx"
+# Cy<n>AcceleratorNumber = 0-3
+# Cy<n>CoreAffinity = 0-7
+#
+# - For Data Compression
+# Dc<n>Name = "xxxx"
+# Dc<n>AcceleratorNumber = 0-1
+# Dc<n>CoreAffinity = 0-7
+#
+# Where:
+# - n is the number of this logical instance starting at 0.
+# - xxxx may be any ascii value which identifies the logical instance.
+#
+# Note: for user space processes, a list of values can be specified for
+# the accelerator number and the core affinity: for example
+# Cy0AcceleratorNumber = 0,2
+# Cy0CoreAffinity = 0,2,4
+# These comma-separated lists will allow the multiple processes to use
+# different accelerators and cores, and will wrap around the numbers
+# in the list. In the above example, process 0 will use accelerator 0,
+# and process 1 will use accelerator 2
+#
+########################################################
+
+##############################################
+# Kernel Instances Section
+##############################################
+[KERNEL]
+NumberCyInstances = 0
+NumberDcInstances = 0
+
+##############################################
+# User Process Instance Section
+##############################################
+[SSL]
+NumberCyInstances = 16
+NumberDcInstances = 0
+NumProcesses = 1
+LimitDevAccess = 0
+
+# Crypto - User instance #0
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+Cy0AcceleratorNumber = 0
+# List of core affinities
+Cy0CoreAffinity = 8
+
+# Crypto - User instance #1
+Cy1Name = "SSL1"
+Cy1IsPolled = 1
+Cy1AcceleratorNumber = 1
+# List of core affinities
+Cy1CoreAffinity = 9
+
+# Crypto - User instance #2
+Cy2Name = "SSL2"
+Cy2IsPolled = 1
+Cy2AcceleratorNumber = 2
+# List of core affinities
+Cy2CoreAffinity = 10
+
+# Crypto - User instance #3
+Cy3Name = "SSL3"
+Cy3IsPolled = 1
+Cy3AcceleratorNumber = 3
+# List of core affinities
+Cy3CoreAffinity = 11
+
+# Crypto - User instance #4
+Cy4Name = "SSL4"
+Cy4IsPolled = 1
+Cy4AcceleratorNumber = 0
+# List of core affinities
+Cy4CoreAffinity = 12
+
+# Crypto - User instance #5
+Cy5Name = "SSL5"
+Cy5IsPolled = 1
+Cy5AcceleratorNumber = 1
+# List of core affinities
+Cy5CoreAffinity = 13
+
+# Crypto - User instance #6
+Cy6Name = "SSL6"
+Cy6IsPolled = 1
+Cy6AcceleratorNumber = 2
+# List of core affinities
+Cy6CoreAffinity = 14
+
+# Crypto - User instance #7
+Cy7Name = "SSL7"
+Cy7IsPolled = 1
+Cy7AcceleratorNumber = 3
+# List of core affinities
+Cy7CoreAffinity = 15
+
+# Crypto - User instance #8
+Cy8Name = "SSL8"
+Cy8IsPolled = 1
+Cy8AcceleratorNumber = 0
+# List of core affinities
+Cy8CoreAffinity = 24
+
+# Crypto - User instance #9
+Cy9Name = "SSL9"
+Cy9IsPolled = 1
+Cy9AcceleratorNumber = 1
+# List of core affinities
+Cy9CoreAffinity = 25
+
+# Crypto - User instance #10
+Cy10Name = "SSL10"
+Cy10IsPolled = 1
+Cy10AcceleratorNumber = 2
+# List of core affinities
+Cy10CoreAffinity = 26
+
+# Crypto - User instance #11
+Cy11Name = "SSL11"
+Cy11IsPolled = 1
+Cy11AcceleratorNumber = 3
+# List of core affinities
+Cy11CoreAffinity = 27
+
+# Crypto - User instance #12
+Cy12Name = "SSL12"
+Cy12IsPolled = 1
+Cy12AcceleratorNumber = 0
+# List of core affinities
+Cy12CoreAffinity = 28
+
+# Crypto - User instance #13
+Cy13Name = "SSL13"
+Cy13IsPolled = 1
+Cy13AcceleratorNumber = 1
+# List of core affinities
+Cy13CoreAffinity = 29
+
+# Crypto - User instance #14
+Cy14Name = "SSL14"
+Cy14IsPolled = 1
+Cy14AcceleratorNumber = 2
+# List of core affinities
+Cy14CoreAffinity = 30
+
+# Crypto - User instance #15
+Cy15Name = "SSL15"
+Cy15IsPolled = 1
+Cy15AcceleratorNumber = 3
+# List of core affinities
+Cy15CoreAffinity = 31
+
+
+##############################################
+# Wireless Process Instance Section
+##############################################
+[WIRELESS]
+NumberCyInstances = 0
+NumberDcInstances = 0
+NumProcesses = 0
diff --git a/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf b/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf
new file mode 100644
index 00000000..c3a85dea
--- /dev/null
+++ b/examples/dpdk_qat/config_files/stargo/dh89xxcc_qa_dev0.conf
@@ -0,0 +1,235 @@
+#########################################################################
+#
+# @par
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+# #########################################################################
+# ########################################################
+#
+# This file is the configuration for a single dh89xxcc_qa
+# device.
+#
+# Each device has up to two accelerators.
+# - The client may load balance between these
+# accelerators.
+# Each accelerator has 8 independent ring banks.
+# - The interrupt for each can be directed to a
+# specific core.
+# Each ring bank as 16 rings (hardware assisted queues).
+#
+#########################################################
+# General Section
+##############################################
+
+[GENERAL]
+ServicesEnabled = cy0;cy1
+
+# Use version 2 of the config file
+ConfigVersion = 2
+# Look Aside Cryptographic Configuration
+cyHmacAuthMode = 1
+
+# Look Aside Compression Configuration
+dcTotalSRAMAvailable = 0
+
+# Firmware Location Configuration
+Firmware_MofPath = mof_firmware.bin
+Firmware_MmpPath = mmp_firmware.bin
+
+#Default values for number of concurrent requests*/
+CyNumConcurrentSymRequests = 512
+CyNumConcurrentAsymRequests = 64
+DcNumConcurrentRequests = 512
+
+#Statistics, valid values: 1,0
+statsGeneral = 1
+statsDc = 1
+statsDh = 1
+statsDrbg = 1
+statsDsa = 1
+statsEcc = 1
+statsKeyGen = 1
+statsLn = 1
+statsPrime = 1
+statsRsa = 1
+statsSym = 1
+
+# Enables or disables Single Root Complex IO Virtualization.
+# If this is enabled (1) then SRIOV and VT-d need to be enabled in
+# BIOS and there can be no Cy or Dc instances created in PF (Dom0).
+# If this i disabled (0) then SRIOV and VT-d need to be disabled
+# in BIOS and Cy and/or Dc instances can be used in PF (Dom0)
+SRIOV_Enabled = 0
+
+#Debug feature, if set to 1 it enables additional entries in /proc filesystem
+ProcDebug = 1
+
+#######################################################
+#
+# Logical Instances Section
+# A logical instance allows each address domain
+# (kernel space and individual user space processes)
+# to configure rings (i.e. hardware assisted queues)
+# to be used by that address domain and to define the
+# behavior of that ring.
+#
+# The address domains are in the following format
+# - For kernel address domains
+# [KERNEL]
+# - For user process address domains
+# [xxxxx]
+# Where xxxxx may be any ascii value which uniquely identifies
+# the user mode process.
+# To allow the driver correctly configure the
+# logical instances associated with this user process,
+# the process must call the icp_sal_userStartMultiProcess(...)
+# passing the xxxxx string during process initialisation.
+# When the user space process is finished it must call
+# icp_sal_userStop(...) to free resources.
+# NumProcesses will indicate the maximum number of processes
+# that can call icp_sal_userStartMultiProcess on this instance.
+# Warning: the resources are preallocated: if NumProcesses
+# is too high, the driver will fail to load
+#
+# Items configurable by a logical instance are:
+# - Name of the logical instance
+# - The accelerator associated with this logical
+# instance
+# - The core the instance is affinitized to (optional)
+#
+# Note: Logical instances may not share the same ring, but
+# may share a ring bank.
+#
+# The format of the logical instances are:
+# - For crypto:
+# Cy<n>Name = "xxxx"
+# Cy<n>AcceleratorNumber = 0-3
+# Cy<n>CoreAffinity = 0-7
+#
+# - For Data Compression
+# Dc<n>Name = "xxxx"
+# Dc<n>AcceleratorNumber = 0-1
+# Dc<n>CoreAffinity = 0-7
+#
+# Where:
+# - n is the number of this logical instance starting at 0.
+# - xxxx may be any ascii value which identifies the logical instance.
+#
+# Note: for user space processes, a list of values can be specified for
+# the accelerator number and the core affinity: for example
+# Cy0AcceleratorNumber = 0,2
+# Cy0CoreAffinity = 0,2,4
+# These comma-separated lists will allow the multiple processes to use
+# different accelerators and cores, and will wrap around the numbers
+# in the list. In the above example, process 0 will use accelerator 0,
+# and process 1 will use accelerator 2
+#
+########################################################
+
+##############################################
+# Kernel Instances Section
+##############################################
+[KERNEL]
+NumberCyInstances = 0
+NumberDcInstances = 0
+
+##############################################
+# User Process Instance Section
+##############################################
+[SSL]
+NumberCyInstances = 8
+NumberDcInstances = 0
+NumProcesses = 1
+LimitDevAccess = 0
+
+# Crypto - User instance #0
+Cy0Name = "SSL0"
+Cy0IsPolled = 1
+Cy0AcceleratorNumber = 0
+# List of core affinities
+Cy0CoreAffinity = 0
+
+# Crypto - User instance #1
+Cy1Name = "SSL1"
+Cy1IsPolled = 1
+Cy1AcceleratorNumber = 1
+# List of core affinities
+Cy1CoreAffinity = 1
+
+# Crypto - User instance #2
+Cy2Name = "SSL2"
+Cy2IsPolled = 1
+Cy2AcceleratorNumber = 2
+# List of core affinities
+Cy2CoreAffinity = 2
+
+# Crypto - User instance #3
+Cy3Name = "SSL3"
+Cy3IsPolled = 1
+Cy3AcceleratorNumber = 3
+# List of core affinities
+Cy3CoreAffinity = 3
+
+# Crypto - User instance #4
+Cy4Name = "SSL4"
+Cy4IsPolled = 1
+Cy4AcceleratorNumber = 0
+# List of core affinities
+Cy4CoreAffinity = 4
+
+# Crypto - User instance #5
+Cy5Name = "SSL5"
+Cy5IsPolled = 1
+Cy5AcceleratorNumber = 1
+# List of core affinities
+Cy5CoreAffinity = 5
+
+# Crypto - User instance #6
+Cy6Name = "SSL6"
+Cy6IsPolled = 1
+Cy6AcceleratorNumber = 2
+# List of core affinities
+Cy6CoreAffinity = 6
+
+# Crypto - User instance #7
+Cy7Name = "SSL7"
+Cy7IsPolled = 1
+Cy7AcceleratorNumber = 3
+# List of core affinities
+Cy7CoreAffinity = 7
+
+##############################################
+# Wireless Process Instance Section
+##############################################
+[WIRELESS]
+NumberCyInstances = 0
+NumberDcInstances = 0
+NumProcesses = 0
diff --git a/examples/dpdk_qat/crypto.c b/examples/dpdk_qat/crypto.c
new file mode 100644
index 00000000..8954bf87
--- /dev/null
+++ b/examples/dpdk_qat/crypto.c
@@ -0,0 +1,944 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <string.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_ether.h>
+#include <rte_malloc.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+
+#define CPA_CY_SYM_DP_TMP_WORKAROUND 1
+
+#include "cpa.h"
+#include "cpa_types.h"
+#include "cpa_cy_sym_dp.h"
+#include "cpa_cy_common.h"
+#include "cpa_cy_im.h"
+#include "icp_sal_user.h"
+#include "icp_sal_poll.h"
+
+#include "crypto.h"
+
+/* CIPHER KEY LENGTHS */
+#define KEY_SIZE_64_IN_BYTES (64 / 8)
+#define KEY_SIZE_56_IN_BYTES (56 / 8)
+#define KEY_SIZE_128_IN_BYTES (128 / 8)
+#define KEY_SIZE_168_IN_BYTES (168 / 8)
+#define KEY_SIZE_192_IN_BYTES (192 / 8)
+#define KEY_SIZE_256_IN_BYTES (256 / 8)
+
+/* HMAC AUTH KEY LENGTHS */
+#define AES_XCBC_AUTH_KEY_LENGTH_IN_BYTES (128 / 8)
+#define SHA1_AUTH_KEY_LENGTH_IN_BYTES (160 / 8)
+#define SHA224_AUTH_KEY_LENGTH_IN_BYTES (224 / 8)
+#define SHA256_AUTH_KEY_LENGTH_IN_BYTES (256 / 8)
+#define SHA384_AUTH_KEY_LENGTH_IN_BYTES (384 / 8)
+#define SHA512_AUTH_KEY_LENGTH_IN_BYTES (512 / 8)
+#define MD5_AUTH_KEY_LENGTH_IN_BYTES (128 / 8)
+#define KASUMI_AUTH_KEY_LENGTH_IN_BYTES (128 / 8)
+
+/* HASH DIGEST LENGHTS */
+#define AES_XCBC_DIGEST_LENGTH_IN_BYTES (128 / 8)
+#define AES_XCBC_96_DIGEST_LENGTH_IN_BYTES (96 / 8)
+#define MD5_DIGEST_LENGTH_IN_BYTES (128 / 8)
+#define SHA1_DIGEST_LENGTH_IN_BYTES (160 / 8)
+#define SHA1_96_DIGEST_LENGTH_IN_BYTES (96 / 8)
+#define SHA224_DIGEST_LENGTH_IN_BYTES (224 / 8)
+#define SHA256_DIGEST_LENGTH_IN_BYTES (256 / 8)
+#define SHA384_DIGEST_LENGTH_IN_BYTES (384 / 8)
+#define SHA512_DIGEST_LENGTH_IN_BYTES (512 / 8)
+#define KASUMI_DIGEST_LENGTH_IN_BYTES (32 / 8)
+
+#define IV_LENGTH_16_BYTES (16)
+#define IV_LENGTH_8_BYTES (8)
+
+
+/*
+ * rte_memzone is used to allocate physically contiguous virtual memory.
+ * In this application we allocate a single block and divide between variables
+ * which require a virtual to physical mapping for use by the QAT driver.
+ * Virt2phys is only performed during initialisation and not on the data-path.
+ */
+
+#define LCORE_MEMZONE_SIZE (1 << 22)
+
+struct lcore_memzone
+{
+ const struct rte_memzone *memzone;
+ void *next_free_address;
+};
+
+/*
+ * Size the qa software response queue.
+ * Note: Head and Tail are 8 bit, therefore, the queue is
+ * fixed to 256 entries.
+ */
+#define CRYPTO_SOFTWARE_QUEUE_SIZE 256
+
+struct qa_callbackQueue {
+ uint8_t head;
+ uint8_t tail;
+ uint16_t numEntries;
+ struct rte_mbuf *qaCallbackRing[CRYPTO_SOFTWARE_QUEUE_SIZE];
+};
+
+struct qa_core_conf {
+ CpaCySymDpSessionCtx *encryptSessionHandleTbl[NUM_CRYPTO][NUM_HMAC];
+ CpaCySymDpSessionCtx *decryptSessionHandleTbl[NUM_CRYPTO][NUM_HMAC];
+ CpaInstanceHandle instanceHandle;
+ struct qa_callbackQueue callbackQueue;
+ uint64_t qaOutstandingRequests;
+ uint64_t numResponseAttempts;
+ uint8_t kickFreq;
+ void *pPacketIV;
+ CpaPhysicalAddr packetIVPhy;
+ struct lcore_memzone lcoreMemzone;
+} __rte_cache_aligned;
+
+#define MAX_CORES (RTE_MAX_LCORE)
+
+static struct qa_core_conf qaCoreConf[MAX_CORES];
+
+/*
+ *Create maximum possible key size,
+ *One for cipher and one for hash
+ */
+struct glob_keys {
+ uint8_t cipher_key[32];
+ uint8_t hash_key[64];
+ uint8_t iv[16];
+};
+
+struct glob_keys g_crypto_hash_keys = {
+ .cipher_key = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,
+ 0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,
+ 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20},
+ .hash_key = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10,
+ 0x11,0x12,0x13,0x14,0x15,0x16,0x17,0x18,
+ 0x19,0x1a,0x1b,0x1c,0x1d,0x1e,0x1f,0x20,
+ 0x21,0x22,0x23,0x24,0x25,0x26,0x27,0x28,
+ 0x29,0x2a,0x2b,0x2c,0x2d,0x2e,0x2f,0x30,
+ 0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,
+ 0x39,0x4a,0x4b,0x4c,0x4d,0x4e,0x4f,0x50},
+ .iv = {0x01,0x02,0x03,0x04,0x05,0x06,0x07,0x08,
+ 0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f,0x10}
+};
+
+/*
+ * Offsets from the start of the packet.
+ *
+ */
+#define PACKET_DATA_START_PHYS(p) \
+ ((p)->buf_physaddr + (p)->data_off)
+
+/*
+ * A fixed offset to where the crypto is to be performed, which is the first
+ * byte after the Ethernet(14 bytes) and IPv4 headers(20 bytes)
+ */
+#define CRYPTO_START_OFFSET (14+20)
+#define HASH_START_OFFSET (14+20)
+#define CIPHER_BLOCK_DEFAULT_SIZE (16)
+#define HASH_BLOCK_DEFAULT_SIZE (16)
+
+/*
+ * Offset to the opdata from the start of the data portion of packet.
+ * Assumption: The buffer is physically contiguous.
+ * +18 takes this to the next cache line.
+ */
+
+#define CRYPTO_OFFSET_TO_OPDATA (ETHER_MAX_LEN+18)
+
+/*
+ * Default number of requests to place on the hardware ring before kicking the
+ * ring pointers.
+ */
+#define CRYPTO_BURST_TX (16)
+
+/*
+ * Only call the qa poll function when the number responses in the software
+ * queue drops below this number.
+ */
+#define CRYPTO_QUEUED_RESP_POLL_THRESHOLD (32)
+
+/*
+ * Limit the number of polls per call to get_next_response.
+ */
+#define GET_NEXT_RESPONSE_FREQ (32)
+
+/*
+ * Max number of responses to pull from the qa in one poll.
+ */
+#define CRYPTO_MAX_RESPONSE_QUOTA \
+ (CRYPTO_SOFTWARE_QUEUE_SIZE-CRYPTO_QUEUED_RESP_POLL_THRESHOLD-1)
+
+#if (CRYPTO_QUEUED_RESP_POLL_THRESHOLD + CRYPTO_MAX_RESPONSE_QUOTA >= \
+ CRYPTO_SOFTWARE_QUEUE_SIZE)
+#error Its possible to overflow the qa response Q with current poll and \
+ response quota.
+#endif
+
+static void
+crypto_callback(CpaCySymDpOpData *pOpData,
+ __rte_unused CpaStatus status,
+ __rte_unused CpaBoolean verifyResult)
+{
+ uint32_t lcore_id;
+ lcore_id = rte_lcore_id();
+ struct qa_callbackQueue *callbackQ = &(qaCoreConf[lcore_id].callbackQueue);
+
+ /*
+ * Received a completion from the QA hardware.
+ * Place the response on the return queue.
+ */
+ callbackQ->qaCallbackRing[callbackQ->head] = pOpData->pCallbackTag;
+ callbackQ->head++;
+ callbackQ->numEntries++;
+ qaCoreConf[lcore_id].qaOutstandingRequests--;
+}
+
+static void
+qa_crypto_callback(CpaCySymDpOpData *pOpData, CpaStatus status,
+ CpaBoolean verifyResult)
+{
+ crypto_callback(pOpData, status, verifyResult);
+}
+
+/*
+ * Each allocation from a particular memzone lasts for the life-time of
+ * the application. No freeing of previous allocations will occur.
+ */
+static void *
+alloc_memzone_region(uint32_t length, uint32_t lcore_id)
+{
+ char *current_free_addr_ptr = NULL;
+ struct lcore_memzone *lcore_memzone = &(qaCoreConf[lcore_id].lcoreMemzone);
+
+ current_free_addr_ptr = lcore_memzone->next_free_address;
+
+ if (current_free_addr_ptr + length >=
+ (char *)lcore_memzone->memzone->addr + lcore_memzone->memzone->len) {
+ printf("Crypto: No memory available in memzone\n");
+ return NULL;
+ }
+ lcore_memzone->next_free_address = current_free_addr_ptr + length;
+
+ return (void *)current_free_addr_ptr;
+}
+
+/*
+ * Virtual to Physical Address translation is only executed during initialization
+ * and not on the data-path.
+ */
+static CpaPhysicalAddr
+qa_v2p(void *ptr)
+{
+ const struct rte_memzone *memzone = NULL;
+ uint32_t lcore_id = 0;
+ RTE_LCORE_FOREACH(lcore_id) {
+ memzone = qaCoreConf[lcore_id].lcoreMemzone.memzone;
+
+ if ((char*) ptr >= (char *) memzone->addr &&
+ (char*) ptr < ((char*) memzone->addr + memzone->len)) {
+ return (CpaPhysicalAddr)
+ (memzone->phys_addr + ((char *) ptr - (char*) memzone->addr));
+ }
+ }
+ printf("Crypto: Corresponding physical address not found in memzone\n");
+ return (CpaPhysicalAddr) 0;
+}
+
+static CpaStatus
+getCoreAffinity(Cpa32U *coreAffinity, const CpaInstanceHandle instanceHandle)
+{
+ CpaInstanceInfo2 info;
+ Cpa16U i = 0;
+ CpaStatus status = CPA_STATUS_SUCCESS;
+
+ memset(&info, 0, sizeof(CpaInstanceInfo2));
+
+ status = cpaCyInstanceGetInfo2(instanceHandle, &info);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: Error getting instance info\n");
+ return CPA_STATUS_FAIL;
+ }
+ for (i = 0; i < MAX_CORES; i++) {
+ if (CPA_BITMAP_BIT_TEST(info.coreAffinity, i)) {
+ *coreAffinity = i;
+ return CPA_STATUS_SUCCESS;
+ }
+ }
+ return CPA_STATUS_FAIL;
+}
+
+static CpaStatus
+get_crypto_instance_on_core(CpaInstanceHandle *pInstanceHandle,
+ uint32_t lcore_id)
+{
+ Cpa16U numInstances = 0, i = 0;
+ CpaStatus status = CPA_STATUS_FAIL;
+ CpaInstanceHandle *pLocalInstanceHandles = NULL;
+ Cpa32U coreAffinity = 0;
+
+ status = cpaCyGetNumInstances(&numInstances);
+ if (CPA_STATUS_SUCCESS != status || numInstances == 0) {
+ return CPA_STATUS_FAIL;
+ }
+
+ pLocalInstanceHandles = rte_malloc("pLocalInstanceHandles",
+ sizeof(CpaInstanceHandle) * numInstances, RTE_CACHE_LINE_SIZE);
+
+ if (NULL == pLocalInstanceHandles) {
+ return CPA_STATUS_FAIL;
+ }
+ status = cpaCyGetInstances(numInstances, pLocalInstanceHandles);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: cpaCyGetInstances failed with status: %"PRId32"\n", status);
+ rte_free((void *) pLocalInstanceHandles);
+ return CPA_STATUS_FAIL;
+ }
+
+ for (i = 0; i < numInstances; i++) {
+ status = getCoreAffinity(&coreAffinity, pLocalInstanceHandles[i]);
+ if (CPA_STATUS_SUCCESS != status) {
+ rte_free((void *) pLocalInstanceHandles);
+ return CPA_STATUS_FAIL;
+ }
+ if (coreAffinity == lcore_id) {
+ printf("Crypto: instance found on core %d\n", i);
+ *pInstanceHandle = pLocalInstanceHandles[i];
+ return CPA_STATUS_SUCCESS;
+ }
+ }
+ /* core affinity not found */
+ rte_free((void *) pLocalInstanceHandles);
+ return CPA_STATUS_FAIL;
+}
+
+static CpaStatus
+initCySymSession(const int pkt_cipher_alg,
+ const int pkt_hash_alg, const CpaCySymHashMode hashMode,
+ const CpaCySymCipherDirection crypto_direction,
+ CpaCySymSessionCtx **ppSessionCtx,
+ const CpaInstanceHandle cyInstanceHandle,
+ const uint32_t lcore_id)
+{
+ Cpa32U sessionCtxSizeInBytes = 0;
+ CpaStatus status = CPA_STATUS_FAIL;
+ CpaBoolean isCrypto = CPA_TRUE, isHmac = CPA_TRUE;
+ CpaCySymSessionSetupData sessionSetupData;
+
+ memset(&sessionSetupData, 0, sizeof(CpaCySymSessionSetupData));
+
+ /* Assumption: key length is set to each algorithm's max length */
+ switch (pkt_cipher_alg) {
+ case NO_CIPHER:
+ isCrypto = CPA_FALSE;
+ break;
+ case CIPHER_DES:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_DES_ECB;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_64_IN_BYTES;
+ break;
+ case CIPHER_DES_CBC:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_DES_CBC;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_64_IN_BYTES;
+ break;
+ case CIPHER_DES3:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_3DES_ECB;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_192_IN_BYTES;
+ break;
+ case CIPHER_DES3_CBC:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_3DES_CBC;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_192_IN_BYTES;
+ break;
+ case CIPHER_AES:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_AES_ECB;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_128_IN_BYTES;
+ break;
+ case CIPHER_AES_CBC_128:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_AES_CBC;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_128_IN_BYTES;
+ break;
+ case CIPHER_KASUMI_F8:
+ sessionSetupData.cipherSetupData.cipherAlgorithm =
+ CPA_CY_SYM_CIPHER_KASUMI_F8;
+ sessionSetupData.cipherSetupData.cipherKeyLenInBytes =
+ KEY_SIZE_128_IN_BYTES;
+ break;
+ default:
+ printf("Crypto: Undefined Cipher specified\n");
+ break;
+ }
+ /* Set the cipher direction */
+ if (isCrypto) {
+ sessionSetupData.cipherSetupData.cipherDirection = crypto_direction;
+ sessionSetupData.cipherSetupData.pCipherKey =
+ g_crypto_hash_keys.cipher_key;
+ sessionSetupData.symOperation = CPA_CY_SYM_OP_CIPHER;
+ }
+
+ /* Setup Hash common fields */
+ switch (pkt_hash_alg) {
+ case NO_HASH:
+ isHmac = CPA_FALSE;
+ break;
+ case HASH_AES_XCBC:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_XCBC;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ AES_XCBC_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_AES_XCBC_96:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_AES_XCBC;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ AES_XCBC_96_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_MD5:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_MD5;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ MD5_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA1:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA1_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA1_96:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA1;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA1_96_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA224:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA224;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA224_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA256:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA256;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA256_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA384:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA384;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA384_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA512:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_SHA512;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ SHA512_DIGEST_LENGTH_IN_BYTES;
+ break;
+ case HASH_KASUMI_F9:
+ sessionSetupData.hashSetupData.hashAlgorithm = CPA_CY_SYM_HASH_KASUMI_F9;
+ sessionSetupData.hashSetupData.digestResultLenInBytes =
+ KASUMI_DIGEST_LENGTH_IN_BYTES;
+ break;
+ default:
+ printf("Crypto: Undefined Hash specified\n");
+ break;
+ }
+ if (isHmac) {
+ sessionSetupData.hashSetupData.hashMode = hashMode;
+ sessionSetupData.symOperation = CPA_CY_SYM_OP_HASH;
+ /* If using authenticated hash setup key lengths */
+ if (CPA_CY_SYM_HASH_MODE_AUTH == hashMode) {
+ /* Use a common max length key */
+ sessionSetupData.hashSetupData.authModeSetupData.authKey =
+ g_crypto_hash_keys.hash_key;
+ switch (pkt_hash_alg) {
+ case HASH_AES_XCBC:
+ case HASH_AES_XCBC_96:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ AES_XCBC_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_MD5:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA1_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA1:
+ case HASH_SHA1_96:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA1_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA224:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA224_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA256:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA256_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA384:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA384_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_SHA512:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ SHA512_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ case HASH_KASUMI_F9:
+ sessionSetupData.hashSetupData.authModeSetupData.authKeyLenInBytes =
+ KASUMI_AUTH_KEY_LENGTH_IN_BYTES;
+ break;
+ default:
+ printf("Crypto: Undefined Hash specified\n");
+ return CPA_STATUS_FAIL;
+ }
+ }
+ }
+
+ /* Only high priority supported */
+ sessionSetupData.sessionPriority = CPA_CY_PRIORITY_HIGH;
+
+ /* If chaining algorithms */
+ if (isCrypto && isHmac) {
+ sessionSetupData.symOperation = CPA_CY_SYM_OP_ALGORITHM_CHAINING;
+ /* @assumption Alg Chain order is cipher then hash for encrypt
+ * and hash then cipher then has for decrypt*/
+ if (CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT == crypto_direction) {
+ sessionSetupData.algChainOrder =
+ CPA_CY_SYM_ALG_CHAIN_ORDER_CIPHER_THEN_HASH;
+ } else {
+ sessionSetupData.algChainOrder =
+ CPA_CY_SYM_ALG_CHAIN_ORDER_HASH_THEN_CIPHER;
+ }
+ }
+ if (!isCrypto && !isHmac) {
+ *ppSessionCtx = NULL;
+ return CPA_STATUS_SUCCESS;
+ }
+
+ /* Set flags for digest operations */
+ sessionSetupData.digestIsAppended = CPA_FALSE;
+ sessionSetupData.verifyDigest = CPA_TRUE;
+
+ /* Get the session context size based on the crypto and/or hash operations*/
+ status = cpaCySymDpSessionCtxGetSize(cyInstanceHandle, &sessionSetupData,
+ &sessionCtxSizeInBytes);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: cpaCySymDpSessionCtxGetSize error, status: %"PRId32"\n",
+ status);
+ return CPA_STATUS_FAIL;
+ }
+
+ *ppSessionCtx = alloc_memzone_region(sessionCtxSizeInBytes, lcore_id);
+ if (NULL == *ppSessionCtx) {
+ printf("Crypto: Failed to allocate memory for Session Context\n");
+ return CPA_STATUS_FAIL;
+ }
+
+ status = cpaCySymDpInitSession(cyInstanceHandle, &sessionSetupData,
+ *ppSessionCtx);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: cpaCySymDpInitSession failed with status %"PRId32"\n", status);
+ return CPA_STATUS_FAIL;
+ }
+ return CPA_STATUS_SUCCESS;
+}
+
+static CpaStatus
+initSessionDataTables(struct qa_core_conf *qaCoreConf,uint32_t lcore_id)
+{
+ Cpa32U i = 0, j = 0;
+ CpaStatus status = CPA_STATUS_FAIL;
+ for (i = 0; i < NUM_CRYPTO; i++) {
+ for (j = 0; j < NUM_HMAC; j++) {
+ if (((i == CIPHER_KASUMI_F8) && (j != NO_HASH) && (j != HASH_KASUMI_F9)) ||
+ ((i != NO_CIPHER) && (i != CIPHER_KASUMI_F8) && (j == HASH_KASUMI_F9)))
+ continue;
+ status = initCySymSession(i, j, CPA_CY_SYM_HASH_MODE_AUTH,
+ CPA_CY_SYM_CIPHER_DIRECTION_ENCRYPT,
+ &qaCoreConf->encryptSessionHandleTbl[i][j],
+ qaCoreConf->instanceHandle,
+ lcore_id);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: Failed to initialize Encrypt sessions\n");
+ return CPA_STATUS_FAIL;
+ }
+ status = initCySymSession(i, j, CPA_CY_SYM_HASH_MODE_AUTH,
+ CPA_CY_SYM_CIPHER_DIRECTION_DECRYPT,
+ &qaCoreConf->decryptSessionHandleTbl[i][j],
+ qaCoreConf->instanceHandle,
+ lcore_id);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: Failed to initialize Decrypt sessions\n");
+ return CPA_STATUS_FAIL;
+ }
+ }
+ }
+ return CPA_STATUS_SUCCESS;
+}
+
+int
+crypto_init(void)
+{
+ if (CPA_STATUS_SUCCESS != icp_sal_userStartMultiProcess("SSL",CPA_FALSE)) {
+ printf("Crypto: Could not start sal for user space\n");
+ return CPA_STATUS_FAIL;
+ }
+ printf("Crypto: icp_sal_userStartMultiProcess(\"SSL\",CPA_FALSE)\n");
+ return 0;
+}
+
+/*
+ * Per core initialisation
+ */
+int
+per_core_crypto_init(uint32_t lcore_id)
+{
+ CpaStatus status = CPA_STATUS_FAIL;
+ char memzone_name[RTE_MEMZONE_NAMESIZE];
+
+ int socketID = rte_lcore_to_socket_id(lcore_id);
+
+ /* Allocate software ring for response messages. */
+
+ qaCoreConf[lcore_id].callbackQueue.head = 0;
+ qaCoreConf[lcore_id].callbackQueue.tail = 0;
+ qaCoreConf[lcore_id].callbackQueue.numEntries = 0;
+ qaCoreConf[lcore_id].kickFreq = 0;
+ qaCoreConf[lcore_id].qaOutstandingRequests = 0;
+ qaCoreConf[lcore_id].numResponseAttempts = 0;
+
+ /* Initialise and reserve lcore memzone for virt2phys translation */
+ snprintf(memzone_name,
+ RTE_MEMZONE_NAMESIZE,
+ "lcore_%u",
+ lcore_id);
+
+ qaCoreConf[lcore_id].lcoreMemzone.memzone = rte_memzone_reserve(
+ memzone_name,
+ LCORE_MEMZONE_SIZE,
+ socketID,
+ 0);
+ if (NULL == qaCoreConf[lcore_id].lcoreMemzone.memzone) {
+ printf("Crypto: Error allocating memzone on lcore %u\n",lcore_id);
+ return -1;
+ }
+ qaCoreConf[lcore_id].lcoreMemzone.next_free_address =
+ qaCoreConf[lcore_id].lcoreMemzone.memzone->addr;
+
+ qaCoreConf[lcore_id].pPacketIV = alloc_memzone_region(IV_LENGTH_16_BYTES,
+ lcore_id);
+
+ if (NULL == qaCoreConf[lcore_id].pPacketIV ) {
+ printf("Crypto: Failed to allocate memory for Initialization Vector\n");
+ return -1;
+ }
+
+ memcpy(qaCoreConf[lcore_id].pPacketIV, &g_crypto_hash_keys.iv,
+ IV_LENGTH_16_BYTES);
+
+ qaCoreConf[lcore_id].packetIVPhy = qa_v2p(qaCoreConf[lcore_id].pPacketIV);
+ if (0 == qaCoreConf[lcore_id].packetIVPhy) {
+ printf("Crypto: Invalid physical address for Initialization Vector\n");
+ return -1;
+ }
+
+ /*
+ * Obtain the instance handle that is mapped to the current lcore.
+ * This can fail if an instance is not mapped to a bank which has been
+ * affinitized to the current lcore.
+ */
+ status = get_crypto_instance_on_core(&(qaCoreConf[lcore_id].instanceHandle),
+ lcore_id);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: get_crypto_instance_on_core failed with status: %"PRId32"\n",
+ status);
+ return -1;
+ }
+
+ status = cpaCySymDpRegCbFunc(qaCoreConf[lcore_id].instanceHandle,
+ (CpaCySymDpCbFunc) qa_crypto_callback);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: cpaCySymDpRegCbFunc failed with status: %"PRId32"\n", status);
+ return -1;
+ }
+
+ /*
+ * Set the address translation callback for virtual to physcial address
+ * mapping. This will be called by the QAT driver during initialisation only.
+ */
+ status = cpaCySetAddressTranslation(qaCoreConf[lcore_id].instanceHandle,
+ (CpaVirtualToPhysical) qa_v2p);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: cpaCySetAddressTranslation failed with status: %"PRId32"\n",
+ status);
+ return -1;
+ }
+
+ status = initSessionDataTables(&qaCoreConf[lcore_id],lcore_id);
+ if (CPA_STATUS_SUCCESS != status) {
+ printf("Crypto: Failed to allocate all session tables.");
+ return -1;
+ }
+ return 0;
+}
+
+static CpaStatus
+enqueueOp(CpaCySymDpOpData *opData, uint32_t lcore_id)
+{
+
+ CpaStatus status;
+
+ /*
+ * Assumption is there is no requirement to do load balancing between
+ * acceleration units - that is one acceleration unit is tied to a core.
+ */
+ opData->instanceHandle = qaCoreConf[lcore_id].instanceHandle;
+
+ if ((++qaCoreConf[lcore_id].kickFreq) % CRYPTO_BURST_TX == 0) {
+ status = cpaCySymDpEnqueueOp(opData, CPA_TRUE);
+ } else {
+ status = cpaCySymDpEnqueueOp(opData, CPA_FALSE);
+ }
+
+ qaCoreConf[lcore_id].qaOutstandingRequests++;
+
+ return status;
+}
+
+void
+crypto_flush_tx_queue(uint32_t lcore_id)
+{
+
+ cpaCySymDpPerformOpNow(qaCoreConf[lcore_id].instanceHandle);
+}
+
+enum crypto_result
+crypto_encrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h)
+{
+ CpaCySymDpOpData *opData =
+ rte_pktmbuf_mtod_offset(rte_buff, CpaCySymDpOpData *,
+ CRYPTO_OFFSET_TO_OPDATA);
+ uint32_t lcore_id;
+
+ if (unlikely(c >= NUM_CRYPTO || h >= NUM_HMAC))
+ return CRYPTO_RESULT_FAIL;
+
+ lcore_id = rte_lcore_id();
+
+ memset(opData, 0, sizeof(CpaCySymDpOpData));
+
+ opData->srcBuffer = opData->dstBuffer = PACKET_DATA_START_PHYS(rte_buff);
+ opData->srcBufferLen = opData->dstBufferLen = rte_buff->data_len;
+ opData->sessionCtx = qaCoreConf[lcore_id].encryptSessionHandleTbl[c][h];
+ opData->thisPhys = PACKET_DATA_START_PHYS(rte_buff)
+ + CRYPTO_OFFSET_TO_OPDATA;
+ opData->pCallbackTag = rte_buff;
+
+ /* if no crypto or hash operations are specified return fail */
+ if (NO_CIPHER == c && NO_HASH == h)
+ return CRYPTO_RESULT_FAIL;
+
+ if (NO_CIPHER != c) {
+ opData->pIv = qaCoreConf[lcore_id].pPacketIV;
+ opData->iv = qaCoreConf[lcore_id].packetIVPhy;
+
+ if (CIPHER_AES_CBC_128 == c)
+ opData->ivLenInBytes = IV_LENGTH_16_BYTES;
+ else
+ opData->ivLenInBytes = IV_LENGTH_8_BYTES;
+
+ opData->cryptoStartSrcOffsetInBytes = CRYPTO_START_OFFSET;
+ opData->messageLenToCipherInBytes = rte_buff->data_len
+ - CRYPTO_START_OFFSET;
+ /*
+ * Work around for padding, message length has to be a multiple of
+ * block size.
+ */
+ opData->messageLenToCipherInBytes -= opData->messageLenToCipherInBytes
+ % CIPHER_BLOCK_DEFAULT_SIZE;
+ }
+
+ if (NO_HASH != h) {
+
+ opData->hashStartSrcOffsetInBytes = HASH_START_OFFSET;
+ opData->messageLenToHashInBytes = rte_buff->data_len
+ - HASH_START_OFFSET;
+ /*
+ * Work around for padding, message length has to be a multiple of block
+ * size.
+ */
+ opData->messageLenToHashInBytes -= opData->messageLenToHashInBytes
+ % HASH_BLOCK_DEFAULT_SIZE;
+
+ /*
+ * Assumption: Ok ignore the passed digest pointer and place HMAC at end
+ * of packet.
+ */
+ opData->digestResult = rte_buff->buf_physaddr + rte_buff->data_len;
+ }
+
+ if (CPA_STATUS_SUCCESS != enqueueOp(opData, lcore_id)) {
+ /*
+ * Failed to place a packet on the hardware queue.
+ * Most likely because the QA hardware is busy.
+ */
+ return CRYPTO_RESULT_FAIL;
+ }
+ return CRYPTO_RESULT_IN_PROGRESS;
+}
+
+enum crypto_result
+crypto_decrypt(struct rte_mbuf *rte_buff, enum cipher_alg c, enum hash_alg h)
+{
+
+ CpaCySymDpOpData *opData = rte_pktmbuf_mtod_offset(rte_buff, void *,
+ CRYPTO_OFFSET_TO_OPDATA);
+ uint32_t lcore_id;
+
+ if (unlikely(c >= NUM_CRYPTO || h >= NUM_HMAC))
+ return CRYPTO_RESULT_FAIL;
+
+ lcore_id = rte_lcore_id();
+
+ memset(opData, 0, sizeof(CpaCySymDpOpData));
+
+ opData->dstBuffer = opData->srcBuffer = PACKET_DATA_START_PHYS(rte_buff);
+ opData->dstBufferLen = opData->srcBufferLen = rte_buff->data_len;
+ opData->thisPhys = PACKET_DATA_START_PHYS(rte_buff)
+ + CRYPTO_OFFSET_TO_OPDATA;
+ opData->sessionCtx = qaCoreConf[lcore_id].decryptSessionHandleTbl[c][h];
+ opData->pCallbackTag = rte_buff;
+
+ /* if no crypto or hmac operations are specified return fail */
+ if (NO_CIPHER == c && NO_HASH == h)
+ return CRYPTO_RESULT_FAIL;
+
+ if (NO_CIPHER != c) {
+ opData->pIv = qaCoreConf[lcore_id].pPacketIV;
+ opData->iv = qaCoreConf[lcore_id].packetIVPhy;
+
+ if (CIPHER_AES_CBC_128 == c)
+ opData->ivLenInBytes = IV_LENGTH_16_BYTES;
+ else
+ opData->ivLenInBytes = IV_LENGTH_8_BYTES;
+
+ opData->cryptoStartSrcOffsetInBytes = CRYPTO_START_OFFSET;
+ opData->messageLenToCipherInBytes = rte_buff->data_len
+ - CRYPTO_START_OFFSET;
+
+ /*
+ * Work around for padding, message length has to be a multiple of block
+ * size.
+ */
+ opData->messageLenToCipherInBytes -= opData->messageLenToCipherInBytes
+ % CIPHER_BLOCK_DEFAULT_SIZE;
+ }
+ if (NO_HASH != h) {
+ opData->hashStartSrcOffsetInBytes = HASH_START_OFFSET;
+ opData->messageLenToHashInBytes = rte_buff->data_len
+ - HASH_START_OFFSET;
+ /*
+ * Work around for padding, message length has to be a multiple of block
+ * size.
+ */
+ opData->messageLenToHashInBytes -= opData->messageLenToHashInBytes
+ % HASH_BLOCK_DEFAULT_SIZE;
+ opData->digestResult = rte_buff->buf_physaddr + rte_buff->data_len;
+ }
+
+ if (CPA_STATUS_SUCCESS != enqueueOp(opData, lcore_id)) {
+ /*
+ * Failed to place a packet on the hardware queue.
+ * Most likely because the QA hardware is busy.
+ */
+ return CRYPTO_RESULT_FAIL;
+ }
+ return CRYPTO_RESULT_IN_PROGRESS;
+}
+
+void *
+crypto_get_next_response(void)
+{
+ uint32_t lcore_id;
+ lcore_id = rte_lcore_id();
+ struct qa_callbackQueue *callbackQ = &(qaCoreConf[lcore_id].callbackQueue);
+ void *entry = NULL;
+
+ if (callbackQ->numEntries) {
+ entry = callbackQ->qaCallbackRing[callbackQ->tail];
+ callbackQ->tail++;
+ callbackQ->numEntries--;
+ }
+
+ /* If there are no outstanding requests no need to poll, return entry */
+ if (qaCoreConf[lcore_id].qaOutstandingRequests == 0)
+ return entry;
+
+ if (callbackQ->numEntries < CRYPTO_QUEUED_RESP_POLL_THRESHOLD
+ && qaCoreConf[lcore_id].numResponseAttempts++
+ % GET_NEXT_RESPONSE_FREQ == 0) {
+ /*
+ * Only poll the hardware when there is less than
+ * CRYPTO_QUEUED_RESP_POLL_THRESHOLD elements in the software queue
+ */
+ icp_sal_CyPollDpInstance(qaCoreConf[lcore_id].instanceHandle,
+ CRYPTO_MAX_RESPONSE_QUOTA);
+ }
+ return entry;
+}
diff --git a/examples/dpdk_qat/crypto.h b/examples/dpdk_qat/crypto.h
new file mode 100644
index 00000000..f68b0b65
--- /dev/null
+++ b/examples/dpdk_qat/crypto.h
@@ -0,0 +1,90 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CRYPTO_H_
+#define CRYPTO_H_
+
+/* Pass Labels/Values to crypto units */
+enum cipher_alg {
+ /* Option to not do any cryptography */
+ NO_CIPHER,
+ CIPHER_DES,
+ CIPHER_DES_CBC,
+ CIPHER_DES3,
+ CIPHER_DES3_CBC,
+ CIPHER_AES,
+ CIPHER_AES_CBC_128,
+ CIPHER_KASUMI_F8,
+ NUM_CRYPTO,
+};
+
+enum hash_alg {
+ /* Option to not do any hash */
+ NO_HASH,
+ HASH_MD5,
+ HASH_SHA1,
+ HASH_SHA1_96,
+ HASH_SHA224,
+ HASH_SHA256,
+ HASH_SHA384,
+ HASH_SHA512,
+ HASH_AES_XCBC,
+ HASH_AES_XCBC_96,
+ HASH_KASUMI_F9,
+ NUM_HMAC,
+};
+
+/* Return value from crypto_{encrypt/decrypt} */
+enum crypto_result {
+ /* Packet was successfully put into crypto queue */
+ CRYPTO_RESULT_IN_PROGRESS,
+ /* Cryptography has failed in some way */
+ CRYPTO_RESULT_FAIL,
+};
+
+extern enum crypto_result crypto_encrypt(struct rte_mbuf *pkt, enum cipher_alg c,
+ enum hash_alg h);
+extern enum crypto_result crypto_decrypt(struct rte_mbuf *pkt, enum cipher_alg c,
+ enum hash_alg h);
+
+extern int crypto_init(void);
+
+extern int per_core_crypto_init(uint32_t lcore_id);
+
+extern void crypto_exit(void);
+
+extern void *crypto_get_next_response(void);
+
+extern void crypto_flush_tx_queue(uint32_t lcore_id);
+
+#endif /* CRYPTO_H_ */
diff --git a/examples/dpdk_qat/main.c b/examples/dpdk_qat/main.c
new file mode 100644
index 00000000..dc68989a
--- /dev/null
+++ b/examples/dpdk_qat/main.c
@@ -0,0 +1,824 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_string_fns.h>
+
+#include "crypto.h"
+
+#define NB_MBUF (32 * 1024)
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define TX_QUEUE_FLUSH_MASK 0xFFFFFFFF
+#define TSC_COUNT_LIMIT 1000
+
+#define ACTION_ENCRYPT 1
+#define ACTION_DECRYPT 2
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static unsigned enabled_port_mask = 0;
+static int promiscuous_on = 1; /**< Ports set in promiscuous mode on by default. */
+
+/* list of enabled ports */
+static uint32_t dst_ports[RTE_MAX_ETHPORTS];
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+
+#define MAX_LCORE_PARAMS 1024
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+};
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_array_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static struct lcore_params * lcore_params = lcore_params_array_default;
+static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
+ sizeof(lcore_params_array_default[0]);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool * pktmbuf_pool[RTE_MAX_NUMA_NODES];
+
+struct lcore_conf {
+ uint64_t tsc;
+ uint64_t tsc_count;
+ uint32_t tx_mask;
+ uint16_t n_rx_queue;
+ uint16_t rx_queue_list_pos;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct mbuf_table rx_mbuf;
+ uint32_t rx_mbuf_pos;
+ uint32_t rx_curr_queue;
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+static inline struct rte_mbuf *
+nic_rx_get_packet(struct lcore_conf *qconf)
+{
+ struct rte_mbuf *pkt;
+
+ if (unlikely(qconf->n_rx_queue == 0))
+ return NULL;
+
+ /* Look for the next queue with packets; return if none */
+ if (unlikely(qconf->rx_mbuf_pos == qconf->rx_mbuf.len)) {
+ uint32_t i;
+
+ qconf->rx_mbuf_pos = 0;
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+ qconf->rx_mbuf.len = rte_eth_rx_burst(
+ qconf->rx_queue_list[qconf->rx_curr_queue].port_id,
+ qconf->rx_queue_list[qconf->rx_curr_queue].queue_id,
+ qconf->rx_mbuf.m_table, MAX_PKT_BURST);
+
+ qconf->rx_curr_queue++;
+ if (unlikely(qconf->rx_curr_queue == qconf->n_rx_queue))
+ qconf->rx_curr_queue = 0;
+ if (likely(qconf->rx_mbuf.len > 0))
+ break;
+ }
+ if (unlikely(i == qconf->n_rx_queue))
+ return NULL;
+ }
+
+ /* Get the next packet from the current queue; if last packet, go to next queue */
+ pkt = qconf->rx_mbuf.m_table[qconf->rx_mbuf_pos];
+ qconf->rx_mbuf_pos++;
+
+ return pkt;
+}
+
+static inline void
+nic_tx_flush_queues(struct lcore_conf *qconf)
+{
+ uint8_t portid;
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ struct rte_mbuf **m_table = NULL;
+ uint16_t queueid, len;
+ uint32_t n, i;
+
+ if (likely((qconf->tx_mask & (1 << portid)) == 0))
+ continue;
+
+ len = qconf->tx_mbufs[portid].len;
+ if (likely(len == 0))
+ continue;
+
+ queueid = qconf->tx_queue_id[portid];
+ m_table = qconf->tx_mbufs[portid].m_table;
+
+ n = rte_eth_tx_burst(portid, queueid, m_table, len);
+ for (i = n; i < len; i++){
+ rte_pktmbuf_free(m_table[i]);
+ }
+
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ qconf->tx_mask = TX_QUEUE_FLUSH_MASK;
+}
+
+static inline void
+nic_tx_send_packet(struct rte_mbuf *pkt, uint8_t port)
+{
+ struct lcore_conf *qconf;
+ uint32_t lcoreid;
+ uint16_t len;
+
+ if (unlikely(pkt == NULL)) {
+ return;
+ }
+
+ lcoreid = rte_lcore_id();
+ qconf = &lcore_conf[lcoreid];
+
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = pkt;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ uint32_t n, i;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id[port];
+ n = rte_eth_tx_burst(port, queueid, qconf->tx_mbufs[port].m_table, MAX_PKT_BURST);
+ for (i = n; i < MAX_PKT_BURST; i++){
+ rte_pktmbuf_free(qconf->tx_mbufs[port].m_table[i]);
+ }
+
+ qconf->tx_mask &= ~(1 << port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+}
+
+/* main processing loop */
+static __attribute__((noreturn)) int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ uint32_t lcoreid;
+ struct lcore_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ lcoreid = rte_lcore_id();
+ qconf = &lcore_conf[lcoreid];
+
+ printf("Thread %u starting...\n", lcoreid);
+
+ for (;;) {
+ struct rte_mbuf *pkt;
+ uint32_t pkt_from_nic_rx = 0;
+ uint8_t port;
+
+ /* Flush TX queues */
+ qconf->tsc_count++;
+ if (unlikely(qconf->tsc_count == TSC_COUNT_LIMIT)) {
+ uint64_t tsc, diff_tsc;
+
+ tsc = rte_rdtsc();
+
+ diff_tsc = tsc - qconf->tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+ nic_tx_flush_queues(qconf);
+ crypto_flush_tx_queue(lcoreid);
+ qconf->tsc = tsc;
+ }
+
+ qconf->tsc_count = 0;
+ }
+
+ /*
+ * Check the Intel QuickAssist queues first
+ *
+ ***/
+ pkt = (struct rte_mbuf *) crypto_get_next_response();
+ if (pkt == NULL) {
+ pkt = nic_rx_get_packet(qconf);
+ pkt_from_nic_rx = 1;
+ }
+ if (pkt == NULL)
+ continue;
+ /* Send packet to either QAT encrypt, QAT decrypt or NIC TX */
+ if (pkt_from_nic_rx) {
+ struct ipv4_hdr *ip = rte_pktmbuf_mtod_offset(pkt,
+ struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ if (ip->src_addr & rte_cpu_to_be_32(ACTION_ENCRYPT)) {
+ if (CRYPTO_RESULT_FAIL == crypto_encrypt(pkt,
+ (enum cipher_alg)((ip->src_addr >> 16) & 0xFF),
+ (enum hash_alg)((ip->src_addr >> 8) & 0xFF)))
+ rte_pktmbuf_free(pkt);
+ continue;
+ }
+
+ if (ip->src_addr & rte_cpu_to_be_32(ACTION_DECRYPT)) {
+ if(CRYPTO_RESULT_FAIL == crypto_decrypt(pkt,
+ (enum cipher_alg)((ip->src_addr >> 16) & 0xFF),
+ (enum hash_alg)((ip->src_addr >> 8) & 0xFF)))
+ rte_pktmbuf_free(pkt);
+ continue;
+ }
+ }
+
+ port = dst_ports[pkt->port];
+
+ /* Transmit the packet */
+ nic_tx_send_packet(pkt, (uint8_t)port);
+ }
+}
+
+static inline unsigned
+get_port_max_rx_queues(uint8_t port_id)
+{
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+ return dev_info.max_rx_queues;
+}
+
+static inline unsigned
+get_port_max_tx_queues(uint8_t port_id)
+{
+ struct rte_eth_dev_info dev_info;
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+ return dev_info.max_tx_queues;
+}
+
+static int
+check_lcore_params(void)
+{
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].queue_id >= get_port_max_rx_queues(lcore_params[i].port_id)) {
+ printf("invalid queue number: %hhu\n", lcore_params[i].queue_id);
+ return -1;
+ }
+ if (!rte_lcore_is_enabled(lcore_params[i].lcore_id)) {
+ printf("error: lcore %hhu is not enabled in lcore mask\n",
+ lcore_params[i].lcore_id);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ portid = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
+ queue = lcore_params[i].queue_id;
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)lcore);
+ return -1;
+ }
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf ("%s [EAL options] -- -p PORTMASK [--no-promisc]"
+ " [--config '(port,queue,lcore)[,(port,queue,lcore)]'\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " --no-promisc: disable promiscuous mode (default is ON)\n"
+ " --config '(port,queue,lcore)': rx queues configuration\n",
+ prgname);
+}
+
+static unsigned
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+
+ return pm;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p_end = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p_end,'(')) != NULL) {
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ ++p;
+ if((p_end = strchr(p,')')) == NULL)
+ return -1;
+
+ size = p_end - p;
+ if(size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"config", 1, 0, 0},
+ {"no-promisc", 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ if (strcmp(lgopts[option_index].name, "config") == 0) {
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+ if (strcmp(lgopts[option_index].name, "no-promisc") == 0) {
+ printf("Promiscuous mode disabled\n");
+ promiscuous_on = 0;
+ }
+ break;
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (enabled_port_mask == 0) {
+ printf("portmask not specified\n");
+ print_usage(prgname);
+ return -1;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+static int
+init_mem(void)
+{
+ int socketid;
+ unsigned lcoreid;
+ char s[64];
+
+ RTE_LCORE_FOREACH(lcoreid) {
+ socketid = rte_lcore_to_socket_id(lcoreid);
+ if (socketid >= RTE_MAX_NUMA_NODES) {
+ printf("Socket %d of lcore %u is out of range %d\n",
+ socketid, lcoreid, RTE_MAX_NUMA_NODES);
+ return -1;
+ }
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] =
+ rte_pktmbuf_pool_create(s, NB_MBUF, 32, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+ if (pktmbuf_pool[socketid] == NULL) {
+ printf("Cannot init mbuf pool on socket %d\n", socketid);
+ return -1;
+ }
+ printf("Allocated mbuf pool on socket %d\n", socketid);
+ }
+ }
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *qconf;
+ struct rte_eth_link link;
+ int ret;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcoreid;
+ uint32_t nb_tx_queue;
+ uint8_t portid, nb_rx_queue, queue, socketid, last_port;
+ unsigned nb_ports_in_mask = 0;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ return -1;
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ if (check_lcore_params() < 0)
+ rte_panic("check_lcore_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ return -1;
+
+ ret = init_mem();
+ if (ret < 0)
+ return -1;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_panic("check_port_config failed\n");
+
+ /* reset dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ dst_ports[portid] = 0;
+ last_port = 0;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ if (nb_ports_in_mask % 2) {
+ dst_ports[portid] = last_port;
+ dst_ports[last_port] = portid;
+ }
+ else
+ last_port = portid;
+
+ nb_ports_in_mask++;
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ dst_ports[last_port] = last_port;
+ }
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid );
+ fflush(stdout);
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ if (nb_rx_queue > get_port_max_rx_queues(portid))
+ rte_panic("Number of rx queues %d exceeds max number of rx queues %u"
+ " for port %d\n", nb_rx_queue, get_port_max_rx_queues(portid),
+ portid);
+ nb_tx_queue = rte_lcore_count();
+ if (nb_tx_queue > get_port_max_tx_queues(portid))
+ rte_panic("Number of lcores %u exceeds max number of tx queues %u"
+ " for port %d\n", nb_tx_queue, get_port_max_tx_queues(portid),
+ portid);
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)nb_tx_queue );
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)nb_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_panic("Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ RTE_LCORE_FOREACH(lcoreid) {
+ socketid = (uint8_t)rte_lcore_to_socket_id(lcoreid);
+ printf("txq=%u,%d,%d ", lcoreid, queueid, socketid);
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socketid,
+ NULL);
+ if (ret < 0)
+ rte_panic("rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_conf[lcoreid];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+ }
+ printf("\n");
+ }
+
+ RTE_LCORE_FOREACH(lcoreid) {
+ qconf = &lcore_conf[lcoreid];
+ printf("\nInitializing rx queues on lcore %u ... ", lcoreid );
+ fflush(stdout);
+ /* init RX queues */
+ for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+ socketid = (uint8_t)rte_lcore_to_socket_id(lcoreid);
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid,
+ NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_panic("rte_eth_rx_queue_setup: err=%d,"
+ "port=%d\n", ret, portid);
+ }
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_panic("rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ printf("done: Port %d ", portid);
+
+ /* get link status */
+ rte_eth_link_get(portid, &link);
+ if (link.link_status)
+ printf(" Link Up - speed %u Mbps - %s\n",
+ (unsigned) link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf(" Link Down\n");
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ }
+ printf("Crypto: Initializing Crypto...\n");
+ if (crypto_init() != 0)
+ return -1;
+
+ RTE_LCORE_FOREACH(lcoreid) {
+ if (per_core_crypto_init(lcoreid) != 0) {
+ printf("Crypto: Cannot init lcore crypto on lcore %u\n", (unsigned)lcoreid);
+ return -1;
+ }
+ }
+ printf("Crypto: Initialization complete\n");
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcoreid) {
+ if (rte_eal_wait_lcore(lcoreid) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ethtool/Makefile b/examples/ethtool/Makefile
new file mode 100644
index 00000000..995cd25b
--- /dev/null
+++ b/examples/ethtool/Makefile
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overwritten by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+else
+
+DIRS-y += lib ethtool-app
+endif
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/ethtool/ethtool-app/Makefile b/examples/ethtool/ethtool-app/Makefile
new file mode 100644
index 00000000..09c66ad1
--- /dev/null
+++ b/examples/ethtool/ethtool-app/Makefile
@@ -0,0 +1,54 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ethtool
+
+# all source are stored in SRCS-y
+SRCS-y := main.c ethapp.c
+
+CFLAGS += -O3 -D_GNU_SOURCE -pthread -I$(SRCDIR)/../lib
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -L$(subst ethtool-app,lib,$(RTE_OUTPUT))/lib
+LDLIBS += -lrte_ethtool
+
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ethtool/ethtool-app/ethapp.c b/examples/ethtool/ethtool-app/ethapp.c
new file mode 100644
index 00000000..2ed4796d
--- /dev/null
+++ b/examples/ethtool/ethtool-app/ethapp.c
@@ -0,0 +1,873 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_etheraddr.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "rte_ethtool.h"
+#include "ethapp.h"
+
+#define EEPROM_DUMP_CHUNKSIZE 1024
+
+
+struct pcmd_get_params {
+ cmdline_fixed_string_t cmd;
+};
+struct pcmd_int_params {
+ cmdline_fixed_string_t cmd;
+ uint16_t port;
+};
+struct pcmd_intstr_params {
+ cmdline_fixed_string_t cmd;
+ uint16_t port;
+ cmdline_fixed_string_t opt;
+};
+struct pcmd_intmac_params {
+ cmdline_fixed_string_t cmd;
+ uint16_t port;
+ struct ether_addr mac;
+};
+struct pcmd_str_params {
+ cmdline_fixed_string_t cmd;
+ cmdline_fixed_string_t opt;
+};
+struct pcmd_vlan_params {
+ cmdline_fixed_string_t cmd;
+ uint16_t port;
+ cmdline_fixed_string_t mode;
+ uint16_t vid;
+};
+struct pcmd_intintint_params {
+ cmdline_fixed_string_t cmd;
+ uint16_t port;
+ uint16_t tx;
+ uint16_t rx;
+};
+
+
+/* Parameter-less commands */
+cmdline_parse_token_string_t pcmd_quit_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "quit");
+cmdline_parse_token_string_t pcmd_stats_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "stats");
+cmdline_parse_token_string_t pcmd_drvinfo_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "drvinfo");
+cmdline_parse_token_string_t pcmd_link_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_get_params, cmd, "link");
+
+/* Commands taking just port id */
+cmdline_parse_token_string_t pcmd_open_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "open");
+cmdline_parse_token_string_t pcmd_stop_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "stop");
+cmdline_parse_token_string_t pcmd_rxmode_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "rxmode");
+cmdline_parse_token_string_t pcmd_portstats_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_int_params, cmd, "portstats");
+cmdline_parse_token_num_t pcmd_int_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_int_params, port, UINT16);
+
+/* Commands taking port id and string */
+cmdline_parse_token_string_t pcmd_eeprom_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "eeprom");
+cmdline_parse_token_string_t pcmd_mtu_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "mtu");
+cmdline_parse_token_string_t pcmd_regs_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "regs");
+
+cmdline_parse_token_num_t pcmd_intstr_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intstr_params, port, UINT16);
+cmdline_parse_token_string_t pcmd_intstr_token_opt =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, opt, NULL);
+
+/* Commands taking port id and a MAC address string */
+cmdline_parse_token_string_t pcmd_macaddr_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intmac_params, cmd, "macaddr");
+cmdline_parse_token_num_t pcmd_intmac_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intmac_params, port, UINT16);
+cmdline_parse_token_etheraddr_t pcmd_intmac_token_mac =
+ TOKEN_ETHERADDR_INITIALIZER(struct pcmd_intmac_params, mac);
+
+/* Command taking just a MAC address */
+cmdline_parse_token_string_t pcmd_validate_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intmac_params, cmd, "validate");
+
+
+/* Commands taking port id and two integers */
+cmdline_parse_token_string_t pcmd_ringparam_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intintint_params, cmd,
+ "ringparam");
+cmdline_parse_token_num_t pcmd_intintint_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, port, UINT16);
+cmdline_parse_token_num_t pcmd_intintint_token_tx =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, tx, UINT16);
+cmdline_parse_token_num_t pcmd_intintint_token_rx =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intintint_params, rx, UINT16);
+
+
+/* Pause commands */
+cmdline_parse_token_string_t pcmd_pause_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params, cmd, "pause");
+cmdline_parse_token_num_t pcmd_pause_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_intstr_params, port, UINT16);
+cmdline_parse_token_string_t pcmd_pause_token_opt =
+ TOKEN_STRING_INITIALIZER(struct pcmd_intstr_params,
+ opt, "all#tx#rx#none");
+
+/* VLAN commands */
+cmdline_parse_token_string_t pcmd_vlan_token_cmd =
+ TOKEN_STRING_INITIALIZER(struct pcmd_vlan_params, cmd, "vlan");
+cmdline_parse_token_num_t pcmd_vlan_token_port =
+ TOKEN_NUM_INITIALIZER(struct pcmd_vlan_params, port, UINT16);
+cmdline_parse_token_string_t pcmd_vlan_token_mode =
+ TOKEN_STRING_INITIALIZER(struct pcmd_vlan_params, mode, "add#del");
+cmdline_parse_token_num_t pcmd_vlan_token_vid =
+ TOKEN_NUM_INITIALIZER(struct pcmd_vlan_params, vid, UINT16);
+
+
+static void
+pcmd_quit_callback(__rte_unused void *ptr_params,
+ struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ cmdline_quit(ctx);
+}
+
+
+static void
+pcmd_drvinfo_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct ethtool_drvinfo info;
+ int id_port;
+
+ for (id_port = 0; id_port < rte_eth_dev_count(); id_port++) {
+ if (rte_ethtool_get_drvinfo(id_port, &info)) {
+ printf("Error getting info for port %i\n", id_port);
+ return;
+ }
+ printf("Port %i driver: %s (ver: %s)\n",
+ id_port, info.driver, info.version
+ );
+ }
+}
+
+
+static void
+pcmd_link_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ int num_ports = rte_eth_dev_count();
+ int id_port, stat_port;
+
+ for (id_port = 0; id_port < num_ports; id_port++) {
+ if (!rte_eth_dev_is_valid_port(id_port))
+ continue;
+ stat_port = rte_ethtool_get_link(id_port);
+ switch (stat_port) {
+ case 0:
+ printf("Port %i: Down\n", id_port);
+ break;
+ case 1:
+ printf("Port %i: Up\n", id_port);
+ break;
+ default:
+ printf("Port %i: Error getting link status\n",
+ id_port
+ );
+ break;
+ }
+ }
+ printf("\n");
+}
+
+
+static void
+pcmd_regs_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_intstr_params *params = ptr_params;
+ int len_regs;
+ struct ethtool_regs regs;
+ unsigned char *buf_data;
+ FILE *fp_regs;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ len_regs = rte_ethtool_get_regs_len(params->port);
+ if (len_regs > 0) {
+ printf("Port %i: %i bytes\n", params->port, len_regs);
+ buf_data = malloc(len_regs);
+ if (buf_data == NULL) {
+ printf("Error allocating %i bytes for buffer\n",
+ len_regs);
+ return;
+ }
+ if (!rte_ethtool_get_regs(params->port, &regs, buf_data)) {
+ fp_regs = fopen(params->opt, "wb");
+ if (fp_regs == NULL) {
+ printf("Error opening '%s' for writing\n",
+ params->opt);
+ } else {
+ if ((int)fwrite(buf_data,
+ 1, len_regs,
+ fp_regs) != len_regs)
+ printf("Error writing '%s'\n",
+ params->opt);
+ fclose(fp_regs);
+ }
+ }
+ free(buf_data);
+ } else if (len_regs == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error getting registers\n", params->port);
+}
+
+
+static void
+pcmd_eeprom_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_intstr_params *params = ptr_params;
+ struct ethtool_eeprom info_eeprom;
+ int len_eeprom;
+ int pos_eeprom;
+ int stat;
+ unsigned char bytes_eeprom[EEPROM_DUMP_CHUNKSIZE];
+ FILE *fp_eeprom;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ len_eeprom = rte_ethtool_get_eeprom_len(params->port);
+ if (len_eeprom > 0) {
+ fp_eeprom = fopen(params->opt, "wb");
+ if (fp_eeprom == NULL) {
+ printf("Error opening '%s' for writing\n",
+ params->opt);
+ return;
+ }
+ printf("Total EEPROM length: %i bytes\n", len_eeprom);
+ info_eeprom.len = EEPROM_DUMP_CHUNKSIZE;
+ for (pos_eeprom = 0;
+ pos_eeprom < len_eeprom;
+ pos_eeprom += EEPROM_DUMP_CHUNKSIZE) {
+ info_eeprom.offset = pos_eeprom;
+ if (pos_eeprom + EEPROM_DUMP_CHUNKSIZE > len_eeprom)
+ info_eeprom.len = len_eeprom - pos_eeprom;
+ else
+ info_eeprom.len = EEPROM_DUMP_CHUNKSIZE;
+ stat = rte_ethtool_get_eeprom(
+ params->port, &info_eeprom, bytes_eeprom
+ );
+ if (stat != 0) {
+ printf("EEPROM read error %i\n", stat);
+ break;
+ }
+ if (fwrite(bytes_eeprom,
+ 1, info_eeprom.len,
+ fp_eeprom) != info_eeprom.len) {
+ printf("Error writing '%s'\n", params->opt);
+ break;
+ }
+ }
+ fclose(fp_eeprom);
+ } else if (len_eeprom == 0)
+ printf("Port %i: Device does not have EEPROM\n", params->port);
+ else if (len_eeprom == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error getting EEPROM\n", params->port);
+}
+
+
+static void
+pcmd_pause_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ void *ptr_data)
+{
+ struct pcmd_intstr_params *params = ptr_params;
+ struct ethtool_pauseparam info;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ if (ptr_data != NULL) {
+ stat = rte_ethtool_get_pauseparam(params->port, &info);
+ } else {
+ memset(&info, 0, sizeof(info));
+ if (strcasecmp("all", params->opt) == 0) {
+ info.tx_pause = 1;
+ info.rx_pause = 1;
+ } else if (strcasecmp("tx", params->opt) == 0) {
+ info.tx_pause = 1;
+ info.rx_pause = 0;
+ } else if (strcasecmp("rx", params->opt) == 0) {
+ info.tx_pause = 0;
+ info.rx_pause = 1;
+ } else {
+ info.tx_pause = 0;
+ info.rx_pause = 0;
+ }
+ /* Assume auto-negotiation wanted */
+ info.autoneg = 1;
+ stat = rte_ethtool_set_pauseparam(params->port, &info);
+ }
+ if (stat == 0) {
+ if (info.rx_pause && info.tx_pause)
+ printf("Port %i: Tx & Rx Paused\n", params->port);
+ else if (info.rx_pause)
+ printf("Port %i: Rx Paused\n", params->port);
+ else if (info.tx_pause)
+ printf("Port %i: Tx Paused\n", params->port);
+ else
+ printf("Port %i: Tx & Rx not paused\n", params->port);
+ } else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error %i\n", params->port, stat);
+}
+
+
+static void
+pcmd_open_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_int_params *params = ptr_params;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ lock_port(params->port);
+ stat = rte_ethtool_net_open(params->port);
+ mark_port_active(params->port);
+ unlock_port(params->port);
+ if (stat == 0)
+ return;
+ else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error opening device\n", params->port);
+}
+
+static void
+pcmd_stop_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_int_params *params = ptr_params;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ lock_port(params->port);
+ stat = rte_ethtool_net_stop(params->port);
+ mark_port_inactive(params->port);
+ unlock_port(params->port);
+ if (stat == 0)
+ return;
+ else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error stopping device\n", params->port);
+}
+
+
+static void
+pcmd_rxmode_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_intstr_params *params = ptr_params;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ stat = rte_ethtool_net_set_rx_mode(params->port);
+ if (stat == 0)
+ return;
+ else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error setting rx mode\n", params->port);
+}
+
+
+static void
+pcmd_macaddr_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ void *ptr_data)
+{
+ struct pcmd_intmac_params *params = ptr_params;
+ struct ether_addr mac_addr;
+ int stat;
+
+ stat = 0;
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ if (ptr_data != NULL) {
+ lock_port(params->port);
+ stat = rte_ethtool_net_set_mac_addr(params->port,
+ &params->mac);
+ mark_port_newmac(params->port);
+ unlock_port(params->port);
+ if (stat == 0) {
+ printf("MAC address changed\n");
+ return;
+ }
+ } else {
+ stat = rte_ethtool_net_get_mac_addr(params->port, &mac_addr);
+ if (stat == 0) {
+ printf(
+ "Port %i MAC Address: %02x:%02x:%02x:%02x:%02x:%02x\n",
+ params->port,
+ mac_addr.addr_bytes[0],
+ mac_addr.addr_bytes[1],
+ mac_addr.addr_bytes[2],
+ mac_addr.addr_bytes[3],
+ mac_addr.addr_bytes[4],
+ mac_addr.addr_bytes[5]);
+ return;
+ }
+ }
+
+ printf("Port %i: Error %s\n", params->port,
+ strerror(-stat));
+}
+
+static void
+pcmd_mtu_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_intstr_params *params = ptr_params;
+ int stat;
+ int new_mtu;
+ char *ptr_parse_end;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ new_mtu = atoi(params->opt);
+ new_mtu = strtoul(params->opt, &ptr_parse_end, 10);
+ if (*ptr_parse_end != '\0' ||
+ new_mtu < ETHER_MIN_MTU ||
+ new_mtu > ETHER_MAX_JUMBO_FRAME_LEN) {
+ printf("Port %i: Invalid MTU value\n", params->port);
+ return;
+ }
+ stat = rte_ethtool_net_change_mtu(params->port, new_mtu);
+ if (stat == 0)
+ printf("Port %i: MTU set to %i\n", params->port, new_mtu);
+ else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error setting MTU\n", params->port);
+}
+
+
+
+static void pcmd_portstats_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_int_params *params = ptr_params;
+ struct rte_eth_stats stat_info;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ stat = rte_ethtool_net_get_stats64(params->port, &stat_info);
+ if (stat == 0) {
+ /* Most of rte_eth_stats is deprecated.. */
+ printf("Port %i stats\n", params->port);
+ printf(" In: %" PRIu64 " (%" PRIu64 " bytes)\n"
+ " Out: %"PRIu64" (%"PRIu64 " bytes)\n"
+ " Err: %"PRIu64"\n",
+ stat_info.ipackets,
+ stat_info.ibytes,
+ stat_info.opackets,
+ stat_info.obytes,
+ stat_info.ierrors+stat_info.oerrors
+ );
+ } else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error fetching statistics\n", params->port);
+}
+
+static void pcmd_ringparam_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ void *ptr_data)
+{
+ struct pcmd_intintint_params *params = ptr_params;
+ struct ethtool_ringparam ring_data;
+ struct ethtool_ringparam ring_params;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ if (ptr_data == NULL) {
+ stat = rte_ethtool_get_ringparam(params->port, &ring_data);
+ if (stat == 0) {
+ printf("Port %i ring parameters\n"
+ " Rx Pending: %i (%i max)\n"
+ " Tx Pending: %i (%i max)\n",
+ params->port,
+ ring_data.rx_pending,
+ ring_data.rx_max_pending,
+ ring_data.tx_pending,
+ ring_data.tx_max_pending);
+ }
+ } else {
+ if (params->tx < 1 || params->rx < 1) {
+ printf("Error: Invalid parameters\n");
+ return;
+ }
+ memset(&ring_params, 0, sizeof(struct ethtool_ringparam));
+ ring_params.tx_pending = params->tx;
+ ring_params.rx_pending = params->rx;
+ lock_port(params->port);
+ stat = rte_ethtool_set_ringparam(params->port, &ring_params);
+ unlock_port(params->port);
+ }
+ if (stat == 0)
+ return;
+ else if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else
+ printf("Port %i: Error fetching statistics\n", params->port);
+}
+
+static void pcmd_validate_callback(void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_intmac_params *params = ptr_params;
+
+ if (rte_ethtool_net_validate_addr(0, &params->mac))
+ printf("Address is unicast\n");
+ else
+ printf("Address is not unicast\n");
+}
+
+
+static void pcmd_vlan_callback(__rte_unused void *ptr_params,
+ __rte_unused struct cmdline *ctx,
+ __rte_unused void *ptr_data)
+{
+ struct pcmd_vlan_params *params = ptr_params;
+ int stat;
+
+ if (!rte_eth_dev_is_valid_port(params->port)) {
+ printf("Error: Invalid port number %i\n", params->port);
+ return;
+ }
+ stat = 0;
+
+ if (strcasecmp("add", params->mode) == 0) {
+ stat = rte_ethtool_net_vlan_rx_add_vid(
+ params->port, params->vid
+ );
+ if (stat == 0)
+ printf("VLAN vid %i added\n", params->vid);
+
+ } else if (strcasecmp("del", params->mode) == 0) {
+ stat = rte_ethtool_net_vlan_rx_kill_vid(
+ params->port, params->vid
+ );
+ if (stat == 0)
+ printf("VLAN vid %i removed\n", params->vid);
+ } else {
+ /* Should not happen! */
+ printf("Error: Bad mode %s\n", params->mode);
+ }
+ if (stat == -ENOTSUP)
+ printf("Port %i: Operation not supported\n", params->port);
+ else if (stat == -ENOSYS)
+ printf("Port %i: VLAN filtering disabled\n", params->port);
+ else if (stat != 0)
+ printf("Port %i: Error changing VLAN setup (code %i)\n",
+ params->port, -stat);
+}
+
+
+cmdline_parse_inst_t pcmd_quit = {
+ .f = pcmd_quit_callback,
+ .data = NULL,
+ .help_str = "quit\n Exit program",
+ .tokens = {(void *)&pcmd_quit_token_cmd, NULL},
+};
+cmdline_parse_inst_t pcmd_drvinfo = {
+ .f = pcmd_drvinfo_callback,
+ .data = NULL,
+ .help_str = "drvinfo\n Print driver info",
+ .tokens = {(void *)&pcmd_drvinfo_token_cmd, NULL},
+};
+cmdline_parse_inst_t pcmd_link = {
+ .f = pcmd_link_callback,
+ .data = NULL,
+ .help_str = "link\n Print port link states",
+ .tokens = {(void *)&pcmd_link_token_cmd, NULL},
+};
+cmdline_parse_inst_t pcmd_regs = {
+ .f = pcmd_regs_callback,
+ .data = NULL,
+ .help_str = "regs <port_id> <filename>\n"
+ " Dump port register(s) to file",
+ .tokens = {
+ (void *)&pcmd_regs_token_cmd,
+ (void *)&pcmd_intstr_token_port,
+ (void *)&pcmd_intstr_token_opt,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_eeprom = {
+ .f = pcmd_eeprom_callback,
+ .data = NULL,
+ .help_str = "eeprom <port_id> <filename>\n Dump EEPROM to file",
+ .tokens = {
+ (void *)&pcmd_eeprom_token_cmd,
+ (void *)&pcmd_intstr_token_port,
+ (void *)&pcmd_intstr_token_opt,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_pause_noopt = {
+ .f = pcmd_pause_callback,
+ .data = (void *)0x01,
+ .help_str = "pause <port_id>\n Print port pause state",
+ .tokens = {
+ (void *)&pcmd_pause_token_cmd,
+ (void *)&pcmd_pause_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_pause = {
+ .f = pcmd_pause_callback,
+ .data = NULL,
+ .help_str =
+ "pause <port_id> <all|tx|rx|none>\n Pause/unpause port",
+ .tokens = {
+ (void *)&pcmd_pause_token_cmd,
+ (void *)&pcmd_pause_token_port,
+ (void *)&pcmd_pause_token_opt,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_open = {
+ .f = pcmd_open_callback,
+ .data = NULL,
+ .help_str = "open <port_id>\n Open port",
+ .tokens = {
+ (void *)&pcmd_open_token_cmd,
+ (void *)&pcmd_int_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_stop = {
+ .f = pcmd_stop_callback,
+ .data = NULL,
+ .help_str = "stop <port_id>\n Stop port",
+ .tokens = {
+ (void *)&pcmd_stop_token_cmd,
+ (void *)&pcmd_int_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_rxmode = {
+ .f = pcmd_rxmode_callback,
+ .data = NULL,
+ .help_str = "rxmode <port_id>\n Toggle port Rx mode",
+ .tokens = {
+ (void *)&pcmd_rxmode_token_cmd,
+ (void *)&pcmd_int_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_macaddr_get = {
+ .f = pcmd_macaddr_callback,
+ .data = NULL,
+ .help_str = "macaddr <port_id>\n"
+ " Get MAC address",
+ .tokens = {
+ (void *)&pcmd_macaddr_token_cmd,
+ (void *)&pcmd_intstr_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_macaddr = {
+ .f = pcmd_macaddr_callback,
+ .data = (void *)0x01,
+ .help_str =
+ "macaddr <port_id> <mac_addr>\n"
+ " Set MAC address",
+ .tokens = {
+ (void *)&pcmd_macaddr_token_cmd,
+ (void *)&pcmd_intmac_token_port,
+ (void *)&pcmd_intmac_token_mac,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_mtu = {
+ .f = pcmd_mtu_callback,
+ .data = NULL,
+ .help_str = "mtu <port_id> <mtu_value>\n"
+ " Change MTU",
+ .tokens = {
+ (void *)&pcmd_mtu_token_cmd,
+ (void *)&pcmd_intstr_token_port,
+ (void *)&pcmd_intstr_token_opt,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_portstats = {
+ .f = pcmd_portstats_callback,
+ .data = NULL,
+ .help_str = "portstats <port_id>\n"
+ " Print port eth statistics",
+ .tokens = {
+ (void *)&pcmd_portstats_token_cmd,
+ (void *)&pcmd_int_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_ringparam = {
+ .f = pcmd_ringparam_callback,
+ .data = NULL,
+ .help_str = "ringparam <port_id>\n"
+ " Print ring parameters",
+ .tokens = {
+ (void *)&pcmd_ringparam_token_cmd,
+ (void *)&pcmd_intintint_token_port,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_ringparam_set = {
+ .f = pcmd_ringparam_callback,
+ .data = (void *)1,
+ .help_str = "ringparam <port_id> <tx_param> <rx_param>\n"
+ " Set ring parameters",
+ .tokens = {
+ (void *)&pcmd_ringparam_token_cmd,
+ (void *)&pcmd_intintint_token_port,
+ (void *)&pcmd_intintint_token_tx,
+ (void *)&pcmd_intintint_token_rx,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_validate = {
+ .f = pcmd_validate_callback,
+ .data = NULL,
+ .help_str = "validate <mac_addr>\n"
+ " Check that MAC address is valid unicast address",
+ .tokens = {
+ (void *)&pcmd_validate_token_cmd,
+ (void *)&pcmd_intmac_token_mac,
+ NULL
+ },
+};
+cmdline_parse_inst_t pcmd_vlan = {
+ .f = pcmd_vlan_callback,
+ .data = NULL,
+ .help_str = "vlan <port_id> <add|del> <vlan_id>\n"
+ " Add/remove VLAN id",
+ .tokens = {
+ (void *)&pcmd_vlan_token_cmd,
+ (void *)&pcmd_vlan_token_port,
+ (void *)&pcmd_vlan_token_mode,
+ (void *)&pcmd_vlan_token_vid,
+ NULL
+ },
+};
+
+
+cmdline_parse_ctx_t list_prompt_commands[] = {
+ (cmdline_parse_inst_t *)&pcmd_drvinfo,
+ (cmdline_parse_inst_t *)&pcmd_eeprom,
+ (cmdline_parse_inst_t *)&pcmd_link,
+ (cmdline_parse_inst_t *)&pcmd_macaddr_get,
+ (cmdline_parse_inst_t *)&pcmd_macaddr,
+ (cmdline_parse_inst_t *)&pcmd_mtu,
+ (cmdline_parse_inst_t *)&pcmd_open,
+ (cmdline_parse_inst_t *)&pcmd_pause_noopt,
+ (cmdline_parse_inst_t *)&pcmd_pause,
+ (cmdline_parse_inst_t *)&pcmd_portstats,
+ (cmdline_parse_inst_t *)&pcmd_regs,
+ (cmdline_parse_inst_t *)&pcmd_ringparam,
+ (cmdline_parse_inst_t *)&pcmd_ringparam_set,
+ (cmdline_parse_inst_t *)&pcmd_rxmode,
+ (cmdline_parse_inst_t *)&pcmd_stop,
+ (cmdline_parse_inst_t *)&pcmd_validate,
+ (cmdline_parse_inst_t *)&pcmd_vlan,
+ (cmdline_parse_inst_t *)&pcmd_quit,
+ NULL
+};
+
+
+void ethapp_main(void)
+{
+ struct cmdline *ctx_cmdline;
+
+ ctx_cmdline = cmdline_stdin_new(list_prompt_commands, "EthApp> ");
+ cmdline_interact(ctx_cmdline);
+ cmdline_stdin_exit(ctx_cmdline);
+}
diff --git a/examples/ethtool/ethtool-app/ethapp.h b/examples/ethtool/ethtool-app/ethapp.h
new file mode 100644
index 00000000..ba438eea
--- /dev/null
+++ b/examples/ethtool/ethtool-app/ethapp.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+void ethapp_main(void);
+void print_stats(void);
+void lock_port(int idx_port);
+void unlock_port(int idx_port);
+void mark_port_inactive(int idx_port);
+void mark_port_active(int idx_port);
+void mark_port_newmac(int idx_port);
diff --git a/examples/ethtool/ethtool-app/main.c b/examples/ethtool/ethtool-app/main.c
new file mode 100644
index 00000000..2c655d83
--- /dev/null
+++ b/examples/ethtool/ethtool-app/main.c
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <rte_common.h>
+#include <rte_spinlock.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include "ethapp.h"
+
+#define MAX_PORTS RTE_MAX_ETHPORTS
+#define MAX_BURST_LENGTH 32
+#define PORT_RX_QUEUE_SIZE 128
+#define PORT_TX_QUEUE_SIZE 256
+#define PKTPOOL_EXTRA_SIZE 512
+#define PKTPOOL_CACHE 32
+
+
+struct txq_port {
+ uint16_t cnt_unsent;
+ struct rte_mbuf *buf_frames[MAX_BURST_LENGTH];
+};
+
+struct app_port {
+ struct ether_addr mac_addr;
+ struct txq_port txq;
+ rte_spinlock_t lock;
+ int port_active;
+ int port_dirty;
+ int idx_port;
+ struct rte_mempool *pkt_pool;
+};
+
+struct app_config {
+ struct app_port ports[MAX_PORTS];
+ int cnt_ports;
+ int exit_now;
+};
+
+
+struct app_config app_cfg;
+
+
+void lock_port(int idx_port)
+{
+ struct app_port *ptr_port = &app_cfg.ports[idx_port];
+
+ rte_spinlock_lock(&ptr_port->lock);
+}
+
+void unlock_port(int idx_port)
+{
+ struct app_port *ptr_port = &app_cfg.ports[idx_port];
+
+ rte_spinlock_unlock(&ptr_port->lock);
+}
+
+void mark_port_active(int idx_port)
+{
+ struct app_port *ptr_port = &app_cfg.ports[idx_port];
+
+ ptr_port->port_active = 1;
+}
+
+void mark_port_inactive(int idx_port)
+{
+ struct app_port *ptr_port = &app_cfg.ports[idx_port];
+
+ ptr_port->port_active = 0;
+}
+
+void mark_port_newmac(int idx_port)
+{
+ struct app_port *ptr_port = &app_cfg.ports[idx_port];
+
+ ptr_port->port_dirty = 1;
+}
+
+static void setup_ports(struct app_config *app_cfg, int cnt_ports)
+{
+ int idx_port;
+ int size_pktpool;
+ struct rte_eth_conf cfg_port;
+ struct rte_eth_dev_info dev_info;
+ char str_name[16];
+
+ memset(&cfg_port, 0, sizeof(cfg_port));
+ cfg_port.txmode.mq_mode = ETH_MQ_TX_NONE;
+
+ for (idx_port = 0; idx_port < cnt_ports; idx_port++) {
+ struct app_port *ptr_port = &app_cfg->ports[idx_port];
+
+ rte_eth_dev_info_get(idx_port, &dev_info);
+ size_pktpool = dev_info.rx_desc_lim.nb_max +
+ dev_info.tx_desc_lim.nb_max + PKTPOOL_EXTRA_SIZE;
+
+ snprintf(str_name, 16, "pkt_pool%i", idx_port);
+ ptr_port->pkt_pool = rte_pktmbuf_pool_create(
+ str_name,
+ size_pktpool, PKTPOOL_CACHE,
+ 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_socket_id()
+ );
+ if (ptr_port->pkt_pool == NULL)
+ rte_exit(EXIT_FAILURE,
+ "rte_pktmbuf_pool_create failed"
+ );
+
+ printf("Init port %i..\n", idx_port);
+ ptr_port->port_active = 1;
+ ptr_port->port_dirty = 0;
+ ptr_port->idx_port = idx_port;
+
+ if (rte_eth_dev_configure(idx_port, 1, 1, &cfg_port) < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_configure failed");
+ if (rte_eth_rx_queue_setup(
+ idx_port, 0, PORT_RX_QUEUE_SIZE,
+ rte_eth_dev_socket_id(idx_port), NULL,
+ ptr_port->pkt_pool) < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup failed"
+ );
+ if (rte_eth_tx_queue_setup(
+ idx_port, 0, PORT_TX_QUEUE_SIZE,
+ rte_eth_dev_socket_id(idx_port), NULL) < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup failed"
+ );
+ if (rte_eth_dev_start(idx_port) < 0)
+ rte_exit(EXIT_FAILURE,
+ "%s:%i: rte_eth_dev_start failed",
+ __FILE__, __LINE__
+ );
+ rte_eth_promiscuous_enable(idx_port);
+ rte_eth_macaddr_get(idx_port, &ptr_port->mac_addr);
+ rte_spinlock_init(&ptr_port->lock);
+ }
+}
+
+static void process_frame(struct app_port *ptr_port,
+ struct rte_mbuf *ptr_frame)
+{
+ struct ether_hdr *ptr_mac_hdr;
+
+ ptr_mac_hdr = rte_pktmbuf_mtod(ptr_frame, struct ether_hdr *);
+ ether_addr_copy(&ptr_mac_hdr->s_addr, &ptr_mac_hdr->d_addr);
+ ether_addr_copy(&ptr_port->mac_addr, &ptr_mac_hdr->s_addr);
+}
+
+static int slave_main(__attribute__((unused)) void *ptr_data)
+{
+ struct app_port *ptr_port;
+ struct rte_mbuf *ptr_frame;
+ struct txq_port *txq;
+
+ uint16_t cnt_recv_frames;
+ uint16_t idx_frame;
+ uint16_t cnt_sent;
+ uint16_t idx_port;
+ uint16_t lock_result;
+
+ while (app_cfg.exit_now == 0) {
+ for (idx_port = 0; idx_port < app_cfg.cnt_ports; idx_port++) {
+ /* Check that port is active and unlocked */
+ ptr_port = &app_cfg.ports[idx_port];
+ lock_result = rte_spinlock_trylock(&ptr_port->lock);
+ if (lock_result == 0)
+ continue;
+ if (ptr_port->port_active == 0) {
+ rte_spinlock_unlock(&ptr_port->lock);
+ continue;
+ }
+ txq = &ptr_port->txq;
+
+ /* MAC address was updated */
+ if (ptr_port->port_dirty == 1) {
+ rte_eth_macaddr_get(ptr_port->idx_port,
+ &ptr_port->mac_addr);
+ ptr_port->port_dirty = 0;
+ }
+
+ /* Incoming frames */
+ cnt_recv_frames = rte_eth_rx_burst(
+ ptr_port->idx_port, 0,
+ &txq->buf_frames[txq->cnt_unsent],
+ RTE_DIM(txq->buf_frames) - txq->cnt_unsent
+ );
+ if (cnt_recv_frames > 0) {
+ for (idx_frame = 0;
+ idx_frame < cnt_recv_frames;
+ idx_frame++) {
+ ptr_frame = txq->buf_frames[
+ idx_frame + txq->cnt_unsent];
+ process_frame(ptr_port, ptr_frame);
+ }
+ txq->cnt_unsent += cnt_recv_frames;
+ }
+
+ /* Outgoing frames */
+ if (txq->cnt_unsent > 0) {
+ cnt_sent = rte_eth_tx_burst(
+ ptr_port->idx_port, 0,
+ txq->buf_frames,
+ txq->cnt_unsent
+ );
+ /* Shuffle up unsent frame pointers */
+ for (idx_frame = cnt_sent;
+ idx_frame < txq->cnt_unsent;
+ idx_frame++)
+ txq->buf_frames[idx_frame - cnt_sent] =
+ txq->buf_frames[idx_frame];
+ txq->cnt_unsent -= cnt_sent;
+ }
+ rte_spinlock_unlock(&ptr_port->lock);
+ } /* end for( idx_port ) */
+ } /* end for(;;) */
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int cnt_args_parsed;
+ uint32_t id_core;
+ uint32_t cnt_ports;
+
+ /* Init runtime enviornment */
+ cnt_args_parsed = rte_eal_init(argc, argv);
+ if (cnt_args_parsed < 0)
+ rte_exit(EXIT_FAILURE, "rte_eal_init(): Failed");
+
+ cnt_ports = rte_eth_dev_count();
+ printf("Number of NICs: %i\n", cnt_ports);
+ if (cnt_ports == 0)
+ rte_exit(EXIT_FAILURE, "No available NIC ports!\n");
+ if (cnt_ports > MAX_PORTS) {
+ printf("Info: Using only %i of %i ports\n",
+ cnt_ports, MAX_PORTS
+ );
+ cnt_ports = MAX_PORTS;
+ }
+
+ setup_ports(&app_cfg, cnt_ports);
+
+ app_cfg.exit_now = 0;
+ app_cfg.cnt_ports = cnt_ports;
+
+ if (rte_lcore_count() < 2)
+ rte_exit(EXIT_FAILURE, "No available slave core!\n");
+ /* Assume there is an available slave.. */
+ id_core = rte_lcore_id();
+ id_core = rte_get_next_lcore(id_core, 1, 1);
+ rte_eal_remote_launch(slave_main, NULL, id_core);
+
+ ethapp_main();
+
+ app_cfg.exit_now = 1;
+ RTE_LCORE_FOREACH_SLAVE(id_core) {
+ if (rte_eal_wait_lcore(id_core) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ethtool/lib/Makefile b/examples/ethtool/lib/Makefile
new file mode 100644
index 00000000..d7ee9555
--- /dev/null
+++ b/examples/ethtool/lib/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overwritten by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# library name
+LIB = librte_ethtool.a
+
+LIBABIVER := 1
+
+# all source are stored in SRC-Y
+SRCS-y := rte_ethtool.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extlib.mk
diff --git a/examples/ethtool/lib/rte_ethtool.c b/examples/ethtool/lib/rte_ethtool.c
new file mode 100644
index 00000000..42e05f1f
--- /dev/null
+++ b/examples/ethtool/lib/rte_ethtool.c
@@ -0,0 +1,423 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <rte_version.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include "rte_ethtool.h"
+
+#define PKTPOOL_SIZE 512
+#define PKTPOOL_CACHE 32
+
+
+int
+rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo)
+{
+ struct rte_eth_dev_info dev_info;
+ int n;
+
+ if (drvinfo == NULL)
+ return -EINVAL;
+
+ if (!rte_eth_dev_is_valid_port(port_id))
+ return -ENODEV;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ rte_eth_dev_info_get(port_id, &dev_info);
+
+ snprintf(drvinfo->driver, sizeof(drvinfo->driver), "%s",
+ dev_info.driver_name);
+ snprintf(drvinfo->version, sizeof(drvinfo->version), "%s",
+ rte_version());
+ snprintf(drvinfo->bus_info, sizeof(drvinfo->bus_info),
+ "%04x:%02x:%02x.%x",
+ dev_info.pci_dev->addr.domain, dev_info.pci_dev->addr.bus,
+ dev_info.pci_dev->addr.devid, dev_info.pci_dev->addr.function);
+
+ n = rte_eth_dev_get_reg_length(port_id);
+ if (n > 0)
+ drvinfo->regdump_len = n;
+ else
+ drvinfo->regdump_len = 0;
+
+ n = rte_eth_dev_get_eeprom_length(port_id);
+ if (n > 0)
+ drvinfo->eedump_len = n;
+ else
+ drvinfo->eedump_len = 0;
+
+ drvinfo->n_stats = sizeof(struct rte_eth_stats) / sizeof(uint64_t);
+ drvinfo->testinfo_len = 0;
+
+ return 0;
+}
+
+int
+rte_ethtool_get_regs_len(uint8_t port_id)
+{
+ int count_regs;
+
+ count_regs = rte_eth_dev_get_reg_length(port_id);
+ if (count_regs > 0)
+ return count_regs * sizeof(uint32_t);
+ return count_regs;
+}
+
+int
+rte_ethtool_get_regs(uint8_t port_id, struct ethtool_regs *regs, void *data)
+{
+ struct rte_dev_reg_info reg_info;
+ int status;
+
+ if (regs == NULL || data == NULL)
+ return -EINVAL;
+
+ reg_info.data = data;
+ reg_info.length = 0;
+
+ status = rte_eth_dev_get_reg_info(port_id, &reg_info);
+ if (status)
+ return status;
+ regs->version = reg_info.version;
+
+ return 0;
+}
+
+int
+rte_ethtool_get_link(uint8_t port_id)
+{
+ struct rte_eth_link link;
+
+ if (!rte_eth_dev_is_valid_port(port_id))
+ return -ENODEV;
+ rte_eth_link_get(port_id, &link);
+ return link.link_status;
+}
+
+int
+rte_ethtool_get_eeprom_len(uint8_t port_id)
+{
+ return rte_eth_dev_get_eeprom_length(port_id);
+}
+
+int
+rte_ethtool_get_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom,
+ void *words)
+{
+ struct rte_dev_eeprom_info eeprom_info;
+ int status;
+
+ if (eeprom == NULL || words == NULL)
+ return -EINVAL;
+
+ eeprom_info.offset = eeprom->offset;
+ eeprom_info.length = eeprom->len;
+ eeprom_info.data = words;
+
+ status = rte_eth_dev_get_eeprom(port_id, &eeprom_info);
+ if (status)
+ return status;
+
+ eeprom->magic = eeprom_info.magic;
+
+ return 0;
+}
+
+int
+rte_ethtool_set_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom,
+ void *words)
+{
+ struct rte_dev_eeprom_info eeprom_info;
+ int status;
+
+ if (eeprom == NULL || words == NULL || eeprom->offset >= eeprom->len)
+ return -EINVAL;
+
+ eeprom_info.offset = eeprom->offset;
+ eeprom_info.length = eeprom->len;
+ eeprom_info.data = words;
+
+ status = rte_eth_dev_set_eeprom(port_id, &eeprom_info);
+ if (status)
+ return status;
+
+ eeprom->magic = eeprom_info.magic;
+
+ return 0;
+}
+
+int
+rte_ethtool_get_pauseparam(uint8_t port_id,
+ struct ethtool_pauseparam *pause_param)
+{
+ struct rte_eth_fc_conf fc_conf;
+ int status;
+
+ if (pause_param == NULL)
+ return -EINVAL;
+
+ status = rte_eth_dev_flow_ctrl_get(port_id, &fc_conf);
+ if (status)
+ return status;
+
+ pause_param->tx_pause = 0;
+ pause_param->rx_pause = 0;
+ switch (fc_conf.mode) {
+ case RTE_FC_RX_PAUSE:
+ pause_param->rx_pause = 1;
+ break;
+ case RTE_FC_TX_PAUSE:
+ pause_param->tx_pause = 1;
+ break;
+ case RTE_FC_FULL:
+ pause_param->rx_pause = 1;
+ pause_param->tx_pause = 1;
+ default:
+ /* dummy block to avoid compiler warning */
+ break;
+ }
+ pause_param->autoneg = (uint32_t)fc_conf.autoneg;
+
+ return 0;
+}
+
+int
+rte_ethtool_set_pauseparam(uint8_t port_id,
+ struct ethtool_pauseparam *pause_param)
+{
+ struct rte_eth_fc_conf fc_conf;
+ int status;
+
+ if (pause_param == NULL)
+ return -EINVAL;
+
+ /*
+ * Read device flow control parameter first since
+ * ethtool set_pauseparam op doesn't have all the information.
+ * as defined in struct rte_eth_fc_conf.
+ * This API requires the device to support both
+ * rte_eth_dev_flow_ctrl_get and rte_eth_dev_flow_ctrl_set, otherwise
+ * return -ENOTSUP
+ */
+ status = rte_eth_dev_flow_ctrl_get(port_id, &fc_conf);
+ if (status)
+ return status;
+
+ fc_conf.autoneg = (uint8_t)pause_param->autoneg;
+
+ if (pause_param->tx_pause) {
+ if (pause_param->rx_pause)
+ fc_conf.mode = RTE_FC_FULL;
+ else
+ fc_conf.mode = RTE_FC_TX_PAUSE;
+ } else {
+ if (pause_param->rx_pause)
+ fc_conf.mode = RTE_FC_RX_PAUSE;
+ else
+ fc_conf.mode = RTE_FC_NONE;
+ }
+
+ status = rte_eth_dev_flow_ctrl_set(port_id, &fc_conf);
+ if (status)
+ return status;
+
+ return 0;
+}
+
+int
+rte_ethtool_net_open(uint8_t port_id)
+{
+ rte_eth_dev_stop(port_id);
+
+ return rte_eth_dev_start(port_id);
+}
+
+int
+rte_ethtool_net_stop(uint8_t port_id)
+{
+ if (!rte_eth_dev_is_valid_port(port_id))
+ return -ENODEV;
+ rte_eth_dev_stop(port_id);
+
+ return 0;
+}
+
+int
+rte_ethtool_net_get_mac_addr(uint8_t port_id, struct ether_addr *addr)
+{
+ if (!rte_eth_dev_is_valid_port(port_id))
+ return -ENODEV;
+ if (addr == NULL)
+ return -EINVAL;
+ rte_eth_macaddr_get(port_id, addr);
+
+ return 0;
+}
+
+int
+rte_ethtool_net_set_mac_addr(uint8_t port_id, struct ether_addr *addr)
+{
+ if (addr == NULL)
+ return -EINVAL;
+ return rte_eth_dev_default_mac_addr_set(port_id, addr);
+}
+
+int
+rte_ethtool_net_validate_addr(uint8_t port_id __rte_unused,
+ struct ether_addr *addr)
+{
+ if (addr == NULL)
+ return -EINVAL;
+ return is_valid_assigned_ether_addr(addr);
+}
+
+int
+rte_ethtool_net_change_mtu(uint8_t port_id, int mtu)
+{
+ if (mtu < 0 || mtu > UINT16_MAX)
+ return -EINVAL;
+ return rte_eth_dev_set_mtu(port_id, (uint16_t)mtu);
+}
+
+int
+rte_ethtool_net_get_stats64(uint8_t port_id, struct rte_eth_stats *stats)
+{
+ if (stats == NULL)
+ return -EINVAL;
+ return rte_eth_stats_get(port_id, stats);
+}
+
+int
+rte_ethtool_net_vlan_rx_add_vid(uint8_t port_id, uint16_t vid)
+{
+ return rte_eth_dev_vlan_filter(port_id, vid, 1);
+}
+
+int
+rte_ethtool_net_vlan_rx_kill_vid(uint8_t port_id, uint16_t vid)
+{
+ return rte_eth_dev_vlan_filter(port_id, vid, 0);
+}
+
+/*
+ * The set_rx_mode provides driver-specific rx mode setting.
+ * This implementation implements rx mode setting based upon
+ * ixgbe/igb drivers. Further improvement is to provide a
+ * callback op field over struct rte_eth_dev::dev_ops so each
+ * driver can register device-specific implementation
+ */
+int
+rte_ethtool_net_set_rx_mode(uint8_t port_id)
+{
+ uint16_t num_vfs;
+ struct rte_eth_dev_info dev_info;
+ uint16_t vf;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ rte_eth_dev_info_get(port_id, &dev_info);
+ num_vfs = dev_info.max_vfs;
+
+ /* Set VF vf_rx_mode, VF unsupport status is discard */
+ for (vf = 0; vf < num_vfs; vf++)
+ rte_eth_dev_set_vf_rxmode(port_id, vf,
+ ETH_VMDQ_ACCEPT_UNTAG, 0);
+
+ /* Enable Rx vlan filter, VF unspport status is discard */
+ rte_eth_dev_set_vlan_offload(port_id, ETH_VLAN_FILTER_MASK);
+
+ return 0;
+}
+
+
+int
+rte_ethtool_get_ringparam(uint8_t port_id,
+ struct ethtool_ringparam *ring_param)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_rxq_info rx_qinfo;
+ struct rte_eth_txq_info tx_qinfo;
+ int stat;
+
+ if (ring_param == NULL)
+ return -EINVAL;
+
+ rte_eth_dev_info_get(port_id, &dev_info);
+
+ stat = rte_eth_rx_queue_info_get(port_id, 0, &rx_qinfo);
+ if (stat != 0)
+ return stat;
+
+ stat = rte_eth_tx_queue_info_get(port_id, 0, &tx_qinfo);
+ if (stat != 0)
+ return stat;
+
+ memset(ring_param, 0, sizeof(*ring_param));
+ ring_param->rx_pending = rx_qinfo.nb_desc;
+ ring_param->rx_max_pending = dev_info.rx_desc_lim.nb_max;
+ ring_param->tx_pending = tx_qinfo.nb_desc;
+ ring_param->tx_max_pending = dev_info.tx_desc_lim.nb_max;
+
+ return 0;
+}
+
+
+int
+rte_ethtool_set_ringparam(uint8_t port_id,
+ struct ethtool_ringparam *ring_param)
+{
+ struct rte_eth_rxq_info rx_qinfo;
+ int stat;
+
+ if (ring_param == NULL)
+ return -EINVAL;
+
+ stat = rte_eth_rx_queue_info_get(port_id, 0, &rx_qinfo);
+ if (stat != 0)
+ return stat;
+
+ rte_eth_dev_stop(port_id);
+
+ stat = rte_eth_tx_queue_setup(port_id, 0, ring_param->tx_pending,
+ rte_socket_id(), NULL);
+ if (stat != 0)
+ return stat;
+
+ stat = rte_eth_rx_queue_setup(port_id, 0, ring_param->rx_pending,
+ rte_socket_id(), NULL, rx_qinfo.mp);
+ if (stat != 0)
+ return stat;
+
+ return rte_eth_dev_start(port_id);
+}
diff --git a/examples/ethtool/lib/rte_ethtool.h b/examples/ethtool/lib/rte_ethtool.h
new file mode 100644
index 00000000..2e79d453
--- /dev/null
+++ b/examples/ethtool/lib/rte_ethtool.h
@@ -0,0 +1,410 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_ETHTOOL_H_
+#define _RTE_ETHTOOL_H_
+
+/*
+ * This new interface is designed to provide a user-space shim layer for
+ * Ethtool and Netdevice op API.
+ *
+ * rte_ethtool_get_driver: ethtool_ops::get_driverinfo
+ * rte_ethtool_get_link: ethtool_ops::get_link
+ * rte_ethtool_get_regs_len: ethtool_ops::get_regs_len
+ * rte_ethtool_get_regs: ethtool_ops::get_regs
+ * rte_ethtool_get_eeprom_len: ethtool_ops::get_eeprom_len
+ * rte_ethtool_get_eeprom: ethtool_ops::get_eeprom
+ * rte_ethtool_set_eeprom: ethtool_ops::set_eeprom
+ * rte_ethtool_get_pauseparam: ethtool_ops::get_pauseparam
+ * rte_ethtool_set_pauseparam: ethtool_ops::set_pauseparam
+ *
+ * rte_ethtool_net_open: net_device_ops::ndo_open
+ * rte_ethtool_net_stop: net_device_ops::ndo_stop
+ * rte_ethtool_net_set_mac_addr: net_device_ops::ndo_set_mac_address
+ * rte_ethtool_net_validate_addr: net_device_ops::ndo_validate_addr
+ * rte_ethtool_net_change_mtu: net_device_ops::rte_net_change_mtu
+ * rte_ethtool_net_get_stats64: net_device_ops::ndo_get_stats64
+ * rte_ethtool_net_vlan_rx_add_vid net_device_ops::ndo_vlan_rx_add_vid
+ * rte_ethtool_net_vlan_rx_kill_vid net_device_ops::ndo_vlan_rx_kill_vid
+ * rte_ethtool_net_set_rx_mode net_device_ops::ndo_set_rx_mode
+ *
+ */
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+#include <rte_ethdev.h>
+#include <linux/ethtool.h>
+
+/**
+ * Retrieve the Ethernet device driver information according to
+ * attributes described by ethtool data structure, ethtool_drvinfo.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param drvinfo
+ * A pointer to get driver information
+ * @return
+ * - (0) if successful.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_ethtool_get_drvinfo(uint8_t port_id, struct ethtool_drvinfo *drvinfo);
+
+/**
+ * Retrieve the Ethernet device register length in bytes.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (> 0) # of device registers (in bytes) available for dump
+ * - (0) no registers available for dump.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_regs_len(uint8_t port_id);
+
+/**
+ * Retrieve the Ethernet device register information according to
+ * attributes described by ethtool data structure, ethtool_regs
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param reg
+ * A pointer to ethtool_regs that has register information
+ * @param data
+ * A pointer to a buffer that is used to retrieve device register content
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_regs(uint8_t port_id, struct ethtool_regs *regs,
+ void *data);
+
+/**
+ * Retrieve the Ethernet device link status
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (1) if link up.
+ * - (0) if link down.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_link(uint8_t port_id);
+
+/**
+ * Retrieve the Ethernet device EEPROM size
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (> 0) device EEPROM size in bytes
+ * - (0) device has NO EEPROM
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_eeprom_len(uint8_t port_id);
+
+/**
+ * Retrieve EEPROM content based upon eeprom range described in ethtool
+ * data structure, ethtool_eeprom
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param eeprom
+ * The pointer of ethtool_eeprom that provides eeprom range
+ * @param words
+ * A buffer that holds data read from eeprom
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom,
+ void *words);
+
+/**
+ * Setting EEPROM content based upon eeprom range described in ethtool
+ * data structure, ethtool_eeprom
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param eeprom
+ * The pointer of ethtool_eeprom that provides eeprom range
+ * @param words
+ * A buffer that holds data to be written into eeprom
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_set_eeprom(uint8_t port_id, struct ethtool_eeprom *eeprom,
+ void *words);
+
+/**
+ * Retrieve the Ethernet device pause frame configuration according to
+ * parameter attributes desribed by ethtool data structure,
+ * ethtool_pauseparam.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param pause_param
+ * The pointer of ethtool_coalesce that gets pause frame
+ * configuration parameters
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_get_pauseparam(uint8_t port_id,
+ struct ethtool_pauseparam *pause_param);
+
+/**
+ * Setting the Ethernet device pause frame configuration according to
+ * parameter attributes desribed by ethtool data structure, ethtool_pauseparam.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param pause_param
+ * The pointer of ethtool_coalesce that gets ring configuration parameters
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_set_pauseparam(uint8_t port_id,
+ struct ethtool_pauseparam *param);
+
+/**
+ * Start the Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_open(uint8_t port_id);
+
+/**
+ * Stop the Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (0) if successful.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_ethtool_net_stop(uint8_t port_id);
+
+/**
+ * Get the Ethernet device MAC address.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param addr
+ * MAC address of the Ethernet device.
+ * @return
+ * - (0) if successful.
+ * - (-ENODEV) if *port_id* invalid.
+ */
+int rte_ethtool_net_get_mac_addr(uint8_t port_id, struct ether_addr *addr);
+
+/**
+ * Setting the Ethernet device MAC address.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param addr
+ * The new MAC addr.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_set_mac_addr(uint8_t port_id, struct ether_addr *addr);
+
+/**
+ * Validate if the provided MAC address is valid unicast address
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param addr
+ * A pointer to a buffer (6-byte, 48bit) for the target MAC address
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_validate_addr(uint8_t port_id, struct ether_addr *addr);
+
+/**
+ * Setting the Ethernet device maximum Tx unit.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param mtu
+ * New MTU
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_change_mtu(uint8_t port_id, int mtu);
+
+/**
+ * Retrieve the Ethernet device traffic statistics
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param stats
+ * A pointer to struct rte_eth_stats for statistics parameters
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - (-EINVAL) if parameters invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_get_stats64(uint8_t port_id, struct rte_eth_stats *stats);
+
+/**
+ * Update the Ethernet device VLAN filter with new vid
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param vid
+ * A new VLAN id
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_vlan_rx_add_vid(uint8_t port_id, uint16_t vid);
+
+/**
+ * Remove VLAN id from Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param vid
+ * A new VLAN id
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_vlan_rx_kill_vid(uint8_t port_id, uint16_t vid);
+
+/**
+ * Setting the Ethernet device rx mode.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ */
+int rte_ethtool_net_set_rx_mode(uint8_t port_id);
+
+/**
+ * Getting ring paramaters for Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param ring_param
+ * Pointer to struct ethrool_ringparam to receive parameters.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ * @note
+ * Only the tx_pending and rx_pending fields of struct ethtool_ringparam
+ * are used, and the function only gets parameters for queue 0.
+ */
+int rte_ethtool_get_ringparam(uint8_t port_id,
+ struct ethtool_ringparam *ring_param);
+
+/**
+ * Setting ring paramaters for Ethernet device.
+ *
+ * @param port_id
+ * The port identifier of the Ethernet device.
+ * @param ring_param
+ * Pointer to struct ethrool_ringparam with parameters to set.
+ * @return
+ * - (0) if successful.
+ * - (-ENOTSUP) if hardware doesn't support.
+ * - (-ENODEV) if *port_id* invalid.
+ * - others depends on the specific operations implementation.
+ * @note
+ * Only the tx_pending and rx_pending fields of struct ethtool_ringparam
+ * are used, and the function only sets parameters for queue 0.
+ */
+int rte_ethtool_set_ringparam(uint8_t port_id,
+ struct ethtool_ringparam *ring_param);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_ETHTOOL_H_ */
diff --git a/examples/exception_path/Makefile b/examples/exception_path/Makefile
new file mode 100644
index 00000000..959914a2
--- /dev/null
+++ b/examples/exception_path/Makefile
@@ -0,0 +1,58 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = exception_path
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/exception_path/main.c b/examples/exception_path/main.c
new file mode 100644
index 00000000..bec98040
--- /dev/null
+++ b/examples/exception_path/main.c
@@ -0,0 +1,571 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <netinet/in.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+#define FATAL_ERROR(fmt, args...) rte_exit(EXIT_FAILURE, fmt "\n", ##args)
+#define PRINT_INFO(fmt, args...) RTE_LOG(INFO, APP, fmt "\n", ##args)
+
+/* Max ports than can be used (each port is associated with two lcores) */
+#define MAX_PORTS (RTE_MAX_LCORE / 2)
+
+/* Max size of a single packet */
+#define MAX_PACKET_SZ (2048)
+
+/* Size of the data buffer in each mbuf */
+#define MBUF_DATA_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM)
+
+/* Number of mbufs in mempool that is created */
+#define NB_MBUF 8192
+
+/* How many packets to attempt to read from NIC in one go */
+#define PKT_BURST_SZ 32
+
+/* How many objects (mbufs) to keep in per-lcore mempool cache */
+#define MEMPOOL_CACHE_SZ PKT_BURST_SZ
+
+/* Number of RX ring descriptors */
+#define NB_RXD 128
+
+/* Number of TX ring descriptors */
+#define NB_TXD 512
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+
+/* Options for configuring ethernet port */
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .header_split = 0, /* Header Split disabled */
+ .hw_ip_checksum = 0, /* IP checksum offload disabled */
+ .hw_vlan_filter = 0, /* VLAN filtering disabled */
+ .jumbo_frame = 0, /* Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /* CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+/* Mempool for mbufs */
+static struct rte_mempool * pktmbuf_pool = NULL;
+
+/* Mask of enabled ports */
+static uint32_t ports_mask = 0;
+
+/* Mask of cores that read from NIC and write to tap */
+static uint64_t input_cores_mask = 0;
+
+/* Mask of cores that read from tap and write to NIC */
+static uint64_t output_cores_mask = 0;
+
+/* Array storing port_id that is associated with each lcore */
+static uint8_t port_ids[RTE_MAX_LCORE];
+
+/* Structure type for recording lcore-specific stats */
+struct stats {
+ uint64_t rx;
+ uint64_t tx;
+ uint64_t dropped;
+};
+
+/* Array of lcore-specific stats */
+static struct stats lcore_stats[RTE_MAX_LCORE];
+
+/* Print out statistics on packets handled */
+static void
+print_stats(void)
+{
+ unsigned i;
+
+ printf("\n**Exception-Path example application statistics**\n"
+ "======= ====== ============ ============ ===============\n"
+ " Lcore Port RX TX Dropped on TX\n"
+ "------- ------ ------------ ------------ ---------------\n");
+ RTE_LCORE_FOREACH(i) {
+ printf("%6u %7u %13"PRIu64" %13"PRIu64" %16"PRIu64"\n",
+ i, (unsigned)port_ids[i],
+ lcore_stats[i].rx, lcore_stats[i].tx,
+ lcore_stats[i].dropped);
+ }
+ printf("======= ====== ============ ============ ===============\n");
+}
+
+/* Custom handling of signals to handle stats */
+static void
+signal_handler(int signum)
+{
+ /* When we receive a USR1 signal, print stats */
+ if (signum == SIGUSR1) {
+ print_stats();
+ }
+
+ /* When we receive a USR2 signal, reset stats */
+ if (signum == SIGUSR2) {
+ memset(&lcore_stats, 0, sizeof(lcore_stats));
+ printf("\n**Statistics have been reset**\n");
+ return;
+ }
+}
+
+/*
+ * Create a tap network interface, or use existing one with same name.
+ * If name[0]='\0' then a name is automatically assigned and returned in name.
+ */
+static int tap_create(char *name)
+{
+ struct ifreq ifr;
+ int fd, ret;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0)
+ return fd;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ /* TAP device without packet information */
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+ if (name && *name)
+ snprintf(ifr.ifr_name, IFNAMSIZ, "%s", name);
+
+ ret = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (ret < 0) {
+ close(fd);
+ return ret;
+ }
+
+ if (name)
+ snprintf(name, IFNAMSIZ, "%s", ifr.ifr_name);
+
+ return fd;
+}
+
+/* Main processing loop */
+static int
+main_loop(__attribute__((unused)) void *arg)
+{
+ const unsigned lcore_id = rte_lcore_id();
+ char tap_name[IFNAMSIZ];
+ int tap_fd;
+
+ if ((1ULL << lcore_id) & input_cores_mask) {
+ /* Create new tap interface */
+ snprintf(tap_name, IFNAMSIZ, "tap_dpdk_%.2u", lcore_id);
+ tap_fd = tap_create(tap_name);
+ if (tap_fd < 0)
+ FATAL_ERROR("Could not create tap interface \"%s\" (%d)",
+ tap_name, tap_fd);
+
+ PRINT_INFO("Lcore %u is reading from port %u and writing to %s",
+ lcore_id, (unsigned)port_ids[lcore_id], tap_name);
+ fflush(stdout);
+ /* Loop forever reading from NIC and writing to tap */
+ for (;;) {
+ struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
+ unsigned i;
+ const unsigned nb_rx =
+ rte_eth_rx_burst(port_ids[lcore_id], 0,
+ pkts_burst, PKT_BURST_SZ);
+ lcore_stats[lcore_id].rx += nb_rx;
+ for (i = 0; likely(i < nb_rx); i++) {
+ struct rte_mbuf *m = pkts_burst[i];
+ /* Ignore return val from write() */
+ int ret = write(tap_fd,
+ rte_pktmbuf_mtod(m, void*),
+ rte_pktmbuf_data_len(m));
+ rte_pktmbuf_free(m);
+ if (unlikely(ret < 0))
+ lcore_stats[lcore_id].dropped++;
+ else
+ lcore_stats[lcore_id].tx++;
+ }
+ }
+ }
+ else if ((1ULL << lcore_id) & output_cores_mask) {
+ /* Create new tap interface */
+ snprintf(tap_name, IFNAMSIZ, "tap_dpdk_%.2u", lcore_id);
+ tap_fd = tap_create(tap_name);
+ if (tap_fd < 0)
+ FATAL_ERROR("Could not create tap interface \"%s\" (%d)",
+ tap_name, tap_fd);
+
+ PRINT_INFO("Lcore %u is reading from %s and writing to port %u",
+ lcore_id, tap_name, (unsigned)port_ids[lcore_id]);
+ fflush(stdout);
+ /* Loop forever reading from tap and writing to NIC */
+ for (;;) {
+ int ret;
+ struct rte_mbuf *m = rte_pktmbuf_alloc(pktmbuf_pool);
+ if (m == NULL)
+ continue;
+
+ ret = read(tap_fd, rte_pktmbuf_mtod(m, void *),
+ MAX_PACKET_SZ);
+ lcore_stats[lcore_id].rx++;
+ if (unlikely(ret < 0)) {
+ FATAL_ERROR("Reading from %s interface failed",
+ tap_name);
+ }
+ m->nb_segs = 1;
+ m->next = NULL;
+ m->pkt_len = (uint16_t)ret;
+ m->data_len = (uint16_t)ret;
+ ret = rte_eth_tx_burst(port_ids[lcore_id], 0, &m, 1);
+ if (unlikely(ret < 1)) {
+ rte_pktmbuf_free(m);
+ lcore_stats[lcore_id].dropped++;
+ }
+ else {
+ lcore_stats[lcore_id].tx++;
+ }
+ }
+ }
+ else {
+ PRINT_INFO("Lcore %u has nothing to do", lcore_id);
+ return 0;
+ }
+ /*
+ * Tap file is closed automatically when program exits. Putting close()
+ * here will cause the compiler to give an error about unreachable code.
+ */
+}
+
+/* Display usage instructions */
+static void
+print_usage(const char *prgname)
+{
+ PRINT_INFO("\nUsage: %s [EAL options] -- -p PORTMASK -i IN_CORES -o OUT_CORES\n"
+ " -p PORTMASK: hex bitmask of ports to use\n"
+ " -i IN_CORES: hex bitmask of cores which read from NIC\n"
+ " -o OUT_CORES: hex bitmask of cores which write to NIC",
+ prgname);
+}
+
+/* Convert string to unsigned number. 0 is returned if error occurs */
+static uint64_t
+parse_unsigned(const char *portmask)
+{
+ char *end = NULL;
+ uint64_t num;
+
+ num = strtoull(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+
+ return (uint64_t)num;
+}
+
+/* Record affinities between ports and lcores in global port_ids[] array */
+static void
+setup_port_lcore_affinities(void)
+{
+ unsigned long i;
+ uint8_t tx_port = 0;
+ uint8_t rx_port = 0;
+
+ /* Setup port_ids[] array, and check masks were ok */
+ RTE_LCORE_FOREACH(i) {
+ if (input_cores_mask & (1ULL << i)) {
+ /* Skip ports that are not enabled */
+ while ((ports_mask & (1 << rx_port)) == 0) {
+ rx_port++;
+ if (rx_port > (sizeof(ports_mask) * 8))
+ goto fail; /* not enough ports */
+ }
+
+ port_ids[i] = rx_port++;
+ }
+ else if (output_cores_mask & (1ULL << i)) {
+ /* Skip ports that are not enabled */
+ while ((ports_mask & (1 << tx_port)) == 0) {
+ tx_port++;
+ if (tx_port > (sizeof(ports_mask) * 8))
+ goto fail; /* not enough ports */
+ }
+
+ port_ids[i] = tx_port++;
+ }
+ }
+
+ if (rx_port != tx_port)
+ goto fail; /* uneven number of cores in masks */
+
+ if (ports_mask & (~((1 << rx_port) - 1)))
+ goto fail; /* unused ports */
+
+ return;
+fail:
+ FATAL_ERROR("Invalid core/port masks specified on command line");
+}
+
+/* Parse the arguments given in the command line of the application */
+static void
+parse_args(int argc, char **argv)
+{
+ int opt;
+ const char *prgname = argv[0];
+
+ /* Disable printing messages within getopt() */
+ opterr = 0;
+
+ /* Parse command line */
+ while ((opt = getopt(argc, argv, "i:o:p:")) != EOF) {
+ switch (opt) {
+ case 'i':
+ input_cores_mask = parse_unsigned(optarg);
+ break;
+ case 'o':
+ output_cores_mask = parse_unsigned(optarg);
+ break;
+ case 'p':
+ ports_mask = parse_unsigned(optarg);
+ break;
+ default:
+ print_usage(prgname);
+ FATAL_ERROR("Invalid option specified");
+ }
+ }
+
+ /* Check that options were parsed ok */
+ if (input_cores_mask == 0) {
+ print_usage(prgname);
+ FATAL_ERROR("IN_CORES not specified correctly");
+ }
+ if (output_cores_mask == 0) {
+ print_usage(prgname);
+ FATAL_ERROR("OUT_CORES not specified correctly");
+ }
+ if (ports_mask == 0) {
+ print_usage(prgname);
+ FATAL_ERROR("PORTMASK not specified correctly");
+ }
+
+ setup_port_lcore_affinities();
+}
+
+/* Initialise a single port on an Ethernet device */
+static void
+init_port(uint8_t port)
+{
+ int ret;
+
+ /* Initialise device and RX/TX queues */
+ PRINT_INFO("Initialising port %u ...", (unsigned)port);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
+ if (ret < 0)
+ FATAL_ERROR("Could not configure port%u (%d)",
+ (unsigned)port, ret);
+
+ ret = rte_eth_rx_queue_setup(port, 0, NB_RXD, rte_eth_dev_socket_id(port),
+ NULL,
+ pktmbuf_pool);
+ if (ret < 0)
+ FATAL_ERROR("Could not setup up RX queue for port%u (%d)",
+ (unsigned)port, ret);
+
+ ret = rte_eth_tx_queue_setup(port, 0, NB_TXD, rte_eth_dev_socket_id(port),
+ NULL);
+ if (ret < 0)
+ FATAL_ERROR("Could not setup up TX queue for port%u (%d)",
+ (unsigned)port, ret);
+
+ ret = rte_eth_dev_start(port);
+ if (ret < 0)
+ FATAL_ERROR("Could not start port%u (%d)", (unsigned)port, ret);
+
+ rte_eth_promiscuous_enable(port);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/* Initialise ports/queues etc. and start main loop on each core */
+int
+main(int argc, char** argv)
+{
+ int ret;
+ unsigned i,high_port;
+ uint8_t nb_sys_ports, port;
+
+ /* Associate signal_hanlder function with USR signals */
+ signal(SIGUSR1, signal_handler);
+ signal(SIGUSR2, signal_handler);
+
+ /* Initialise EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ FATAL_ERROR("Could not initialise EAL (%d)", ret);
+ argc -= ret;
+ argv += ret;
+
+ /* Parse application arguments (after the EAL ones) */
+ parse_args(argc, argv);
+
+ /* Create the mbuf pool */
+ pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
+ MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
+ if (pktmbuf_pool == NULL) {
+ FATAL_ERROR("Could not initialise mbuf pool");
+ return -1;
+ }
+
+ /* Get number of ports found in scan */
+ nb_sys_ports = rte_eth_dev_count();
+ if (nb_sys_ports == 0)
+ FATAL_ERROR("No supported Ethernet device found");
+ /* Find highest port set in portmask */
+ for (high_port = (sizeof(ports_mask) * 8) - 1;
+ (high_port != 0) && !(ports_mask & (1 << high_port));
+ high_port--)
+ ; /* empty body */
+ if (high_port > nb_sys_ports)
+ FATAL_ERROR("Port mask requires more ports than available");
+
+ /* Initialise each port */
+ for (port = 0; port < nb_sys_ports; port++) {
+ /* Skip ports that are not enabled */
+ if ((ports_mask & (1 << port)) == 0) {
+ continue;
+ }
+ init_port(port);
+ }
+ check_all_ports_link_status(nb_sys_ports, ports_mask);
+
+ /* Launch per-lcore function on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ if (rte_eal_wait_lcore(i) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/helloworld/Makefile b/examples/helloworld/Makefile
new file mode 100644
index 00000000..d2cca7a7
--- /dev/null
+++ b/examples/helloworld/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = helloworld
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/helloworld/main.c b/examples/helloworld/main.c
new file mode 100644
index 00000000..8b7a2de0
--- /dev/null
+++ b/examples/helloworld/main.c
@@ -0,0 +1,77 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+
+static int
+lcore_hello(__attribute__((unused)) void *arg)
+{
+ unsigned lcore_id;
+ lcore_id = rte_lcore_id();
+ printf("hello from core %u\n", lcore_id);
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned lcore_id;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ /* call lcore_hello() on every slave lcore */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(lcore_hello, NULL, lcore_id);
+ }
+
+ /* call it on master lcore too */
+ lcore_hello(NULL);
+
+ rte_eal_mp_wait_lcore();
+ return 0;
+}
diff --git a/examples/ip_fragmentation/Makefile b/examples/ip_fragmentation/Makefile
new file mode 100644
index 00000000..c321e6a1
--- /dev/null
+++ b/examples/ip_fragmentation/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ip_fragmentation
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ip_fragmentation/main.c b/examples/ip_fragmentation/main.c
new file mode 100644
index 00000000..81a49187
--- /dev/null
+++ b/examples/ip_fragmentation/main.c
@@ -0,0 +1,965 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/param.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+#include <rte_ip.h>
+#include <rte_string_fns.h>
+
+#include <rte_ip_frag.h>
+
+#define RTE_LOGTYPE_IP_FRAG RTE_LOGTYPE_USER1
+
+/* allow max jumbo frame 9.5 KB */
+#define JUMBO_FRAME_MAX_SIZE 0x2600
+
+#define ROUNDUP_DIV(a, b) (((a) + (b) - 1) / (b))
+
+/*
+ * Default byte size for the IPv6 Maximum Transfer Unit (MTU).
+ * This value includes the size of IPv6 header.
+ */
+#define IPV4_MTU_DEFAULT ETHER_MTU
+#define IPV6_MTU_DEFAULT ETHER_MTU
+
+/*
+ * Default payload in bytes for the IPv6 packet.
+ */
+#define IPV4_DEFAULT_PAYLOAD (IPV4_MTU_DEFAULT - sizeof(struct ipv4_hdr))
+#define IPV6_DEFAULT_PAYLOAD (IPV6_MTU_DEFAULT - sizeof(struct ipv6_hdr))
+
+/*
+ * Max number of fragments per packet expected - defined by config file.
+ */
+#define MAX_PACKET_FRAG RTE_LIBRTE_IP_FRAG_MAX_FRAG
+
+#define NB_MBUF 8192
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+#ifndef IPv4_BYTES
+#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
+#define IPv4_BYTES(addr) \
+ (uint8_t) (((addr) >> 24) & 0xFF),\
+ (uint8_t) (((addr) >> 16) & 0xFF),\
+ (uint8_t) (((addr) >> 8) & 0xFF),\
+ (uint8_t) ((addr) & 0xFF)
+#endif
+
+#ifndef IPv6_BYTES
+#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
+ "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+#define IPv6_BYTES(addr) \
+ addr[0], addr[1], addr[2], addr[3], \
+ addr[4], addr[5], addr[6], addr[7], \
+ addr[8], addr[9], addr[10], addr[11],\
+ addr[12], addr[13],addr[14], addr[15]
+#endif
+
+#define IPV6_ADDR_LEN 16
+
+/* mask of enabled ports */
+static int enabled_port_mask = 0;
+
+static int rx_queue_per_lcore = 1;
+
+#define MBUF_TABLE_SIZE (2 * MAX(MAX_PKT_BURST, MAX_PACKET_FRAG))
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MBUF_TABLE_SIZE];
+};
+
+struct rx_queue {
+ struct rte_mempool *direct_pool;
+ struct rte_mempool *indirect_pool;
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ uint8_t portid;
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ uint16_t n_rx_queue;
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 1, /**< Jumbo Frame Support enabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+/*
+ * IPv4 forwarding table
+ */
+struct l3fwd_ipv4_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
+ {IPv4(100,10,0,0), 16, 0},
+ {IPv4(100,20,0,0), 16, 1},
+ {IPv4(100,30,0,0), 16, 2},
+ {IPv4(100,40,0,0), 16, 3},
+ {IPv4(100,50,0,0), 16, 4},
+ {IPv4(100,60,0,0), 16, 5},
+ {IPv4(100,70,0,0), 16, 6},
+ {IPv4(100,80,0,0), 16, 7},
+};
+
+/*
+ * IPv6 forwarding table
+ */
+
+struct l3fwd_ipv6_route {
+ uint8_t ip[IPV6_ADDR_LEN];
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
+ {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
+ {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
+ {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
+ {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
+ {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
+ {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
+ {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
+ {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
+};
+
+#define LPM_MAX_RULES 1024
+#define LPM6_MAX_RULES 1024
+#define LPM6_NUMBER_TBL8S (1 << 16)
+
+struct rte_lpm6_config lpm6_config = {
+ .max_rules = LPM6_MAX_RULES,
+ .number_tbl8s = LPM6_NUMBER_TBL8S,
+ .flags = 0
+};
+
+static struct rte_mempool *socket_direct_pool[RTE_MAX_NUMA_NODES];
+static struct rte_mempool *socket_indirect_pool[RTE_MAX_NUMA_NODES];
+static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
+static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
+
+/* Send burst of packets on an output interface */
+static inline int
+send_burst(struct lcore_queue_conf *qconf, uint16_t n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ int ret;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+static inline void
+l3fwd_simple_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf,
+ uint8_t queueid, uint8_t port_in)
+{
+ struct rx_queue *rxq;
+ uint32_t i, len, next_hop_ipv4;
+ uint8_t next_hop_ipv6, port_out, ipv6;
+ int32_t len2;
+
+ ipv6 = 0;
+ rxq = &qconf->rx_queue_list[queueid];
+
+ /* by default, send everything back to the source port */
+ port_out = port_in;
+
+ /* Remove the Ethernet header and trailer from the input packet */
+ rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr));
+
+ /* Build transmission burst */
+ len = qconf->tx_mbufs[port_out].len;
+
+ /* if this is an IPv4 packet */
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ struct ipv4_hdr *ip_hdr;
+ uint32_t ip_dst;
+ /* Read the lookup key (i.e. ip_dst) from the input packet */
+ ip_hdr = rte_pktmbuf_mtod(m, struct ipv4_hdr *);
+ ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
+
+ /* Find destination port */
+ if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop_ipv4) == 0 &&
+ (enabled_port_mask & 1 << next_hop_ipv4) != 0) {
+ port_out = next_hop_ipv4;
+
+ /* Build transmission burst for new port */
+ len = qconf->tx_mbufs[port_out].len;
+ }
+
+ /* if we don't need to do any fragmentation */
+ if (likely (IPV4_MTU_DEFAULT >= m->pkt_len)) {
+ qconf->tx_mbufs[port_out].m_table[len] = m;
+ len2 = 1;
+ } else {
+ len2 = rte_ipv4_fragment_packet(m,
+ &qconf->tx_mbufs[port_out].m_table[len],
+ (uint16_t)(MBUF_TABLE_SIZE - len),
+ IPV4_MTU_DEFAULT,
+ rxq->direct_pool, rxq->indirect_pool);
+
+ /* Free input packet */
+ rte_pktmbuf_free(m);
+
+ /* If we fail to fragment the packet */
+ if (unlikely (len2 < 0))
+ return;
+ }
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* if this is an IPv6 packet */
+ struct ipv6_hdr *ip_hdr;
+
+ ipv6 = 1;
+
+ /* Read the lookup key (i.e. ip_dst) from the input packet */
+ ip_hdr = rte_pktmbuf_mtod(m, struct ipv6_hdr *);
+
+ /* Find destination port */
+ if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, &next_hop_ipv6) == 0 &&
+ (enabled_port_mask & 1 << next_hop_ipv6) != 0) {
+ port_out = next_hop_ipv6;
+
+ /* Build transmission burst for new port */
+ len = qconf->tx_mbufs[port_out].len;
+ }
+
+ /* if we don't need to do any fragmentation */
+ if (likely (IPV6_MTU_DEFAULT >= m->pkt_len)) {
+ qconf->tx_mbufs[port_out].m_table[len] = m;
+ len2 = 1;
+ } else {
+ len2 = rte_ipv6_fragment_packet(m,
+ &qconf->tx_mbufs[port_out].m_table[len],
+ (uint16_t)(MBUF_TABLE_SIZE - len),
+ IPV6_MTU_DEFAULT,
+ rxq->direct_pool, rxq->indirect_pool);
+
+ /* Free input packet */
+ rte_pktmbuf_free(m);
+
+ /* If we fail to fragment the packet */
+ if (unlikely (len2 < 0))
+ return;
+ }
+ }
+ /* else, just forward the packet */
+ else {
+ qconf->tx_mbufs[port_out].m_table[len] = m;
+ len2 = 1;
+ }
+
+ for (i = len; i < len + len2; i ++) {
+ void *d_addr_bytes;
+
+ m = qconf->tx_mbufs[port_out].m_table[i];
+ struct ether_hdr *eth_hdr = (struct ether_hdr *)
+ rte_pktmbuf_prepend(m, (uint16_t)sizeof(struct ether_hdr));
+ if (eth_hdr == NULL) {
+ rte_panic("No headroom in mbuf.\n");
+ }
+
+ m->l2_len = sizeof(struct ether_hdr);
+
+ /* 02:00:00:00:00:xx */
+ d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)port_out << 40);
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[port_out], &eth_hdr->s_addr);
+ if (ipv6)
+ eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6);
+ else
+ eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
+ }
+
+ len += len2;
+
+ if (likely(len < MAX_PKT_BURST)) {
+ qconf->tx_mbufs[port_out].len = (uint16_t)len;
+ return;
+ }
+
+ /* Transmit packets */
+ send_burst(qconf, (uint16_t)len, port_out);
+ qconf->tx_mbufs[port_out].len = 0;
+}
+
+/* main processing loop */
+static int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, j, nb_rx;
+ uint8_t portid;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, IP_FRAG, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, IP_FRAG, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].portid;
+ RTE_LOG(INFO, IP_FRAG, " -- lcoreid=%u portid=%d\n", lcore_id,
+ (int) portid);
+ }
+
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ /*
+ * This could be optimized (use queueid instead of
+ * portid), but it is not called so often
+ */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (qconf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(&lcore_queue_conf[lcore_id],
+ qconf->tx_mbufs[portid].len,
+ portid);
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].portid;
+ nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
+ MAX_PKT_BURST);
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_burst[j], void *));
+ }
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++) {
+ l3fwd_simple_forward(pkts_burst[j], qconf, i, portid);
+ }
+ }
+ }
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n",
+ prgname);
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n == 0)
+ return -1;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask < 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ rx_queue_per_lcore = parse_nqueue(optarg);
+ if (rx_queue_per_lcore < 0) {
+ printf("invalid queue number\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ print_usage(prgname);
+ return -1;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (enabled_port_mask == 0) {
+ printf("portmask not specified\n");
+ print_usage(prgname);
+ return -1;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("\ndone\n");
+ }
+ }
+}
+
+static int
+init_routing_table(void)
+{
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ int socket, ret;
+ unsigned i;
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ if (socket_lpm[socket]) {
+ lpm = socket_lpm[socket];
+ /* populate the LPM table */
+ for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
+ ret = rte_lpm_add(lpm,
+ l3fwd_ipv4_route_array[i].ip,
+ l3fwd_ipv4_route_array[i].depth,
+ l3fwd_ipv4_route_array[i].if_out);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
+ "LPM table\n", i);
+ return -1;
+ }
+
+ RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv4_BYTES_FMT
+ "/%d (port %d)\n",
+ socket,
+ IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
+ l3fwd_ipv4_route_array[i].depth,
+ l3fwd_ipv4_route_array[i].if_out);
+ }
+ }
+
+ if (socket_lpm6[socket]) {
+ lpm6 = socket_lpm6[socket];
+ /* populate the LPM6 table */
+ for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
+ ret = rte_lpm6_add(lpm6,
+ l3fwd_ipv6_route_array[i].ip,
+ l3fwd_ipv6_route_array[i].depth,
+ l3fwd_ipv6_route_array[i].if_out);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, IP_FRAG, "Unable to add entry %i to the l3fwd "
+ "LPM6 table\n", i);
+ return -1;
+ }
+
+ RTE_LOG(INFO, IP_FRAG, "Socket %i: adding route " IPv6_BYTES_FMT
+ "/%d (port %d)\n",
+ socket,
+ IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
+ l3fwd_ipv6_route_array[i].depth,
+ l3fwd_ipv6_route_array[i].if_out);
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+init_mem(void)
+{
+ char buf[PATH_MAX];
+ struct rte_mempool *mp;
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ struct rte_lpm_config lpm_config;
+ int socket;
+ unsigned lcore_id;
+
+ /* traverse through lcores and initialize structures on each socket */
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ socket = rte_lcore_to_socket_id(lcore_id);
+
+ if (socket == SOCKET_ID_ANY)
+ socket = 0;
+
+ if (socket_direct_pool[socket] == NULL) {
+ RTE_LOG(INFO, IP_FRAG, "Creating direct mempool on socket %i\n",
+ socket);
+ snprintf(buf, sizeof(buf), "pool_direct_%i", socket);
+
+ mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, socket);
+ if (mp == NULL) {
+ RTE_LOG(ERR, IP_FRAG, "Cannot create direct mempool\n");
+ return -1;
+ }
+ socket_direct_pool[socket] = mp;
+ }
+
+ if (socket_indirect_pool[socket] == NULL) {
+ RTE_LOG(INFO, IP_FRAG, "Creating indirect mempool on socket %i\n",
+ socket);
+ snprintf(buf, sizeof(buf), "pool_indirect_%i", socket);
+
+ mp = rte_pktmbuf_pool_create(buf, NB_MBUF, 32, 0, 0,
+ socket);
+ if (mp == NULL) {
+ RTE_LOG(ERR, IP_FRAG, "Cannot create indirect mempool\n");
+ return -1;
+ }
+ socket_indirect_pool[socket] = mp;
+ }
+
+ if (socket_lpm[socket] == NULL) {
+ RTE_LOG(INFO, IP_FRAG, "Creating LPM table on socket %i\n", socket);
+ snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
+
+ lpm_config.max_rules = LPM_MAX_RULES;
+ lpm_config.number_tbl8s = 256;
+ lpm_config.flags = 0;
+
+ lpm = rte_lpm_create(buf, socket, &lpm_config);
+ if (lpm == NULL) {
+ RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
+ return -1;
+ }
+ socket_lpm[socket] = lpm;
+ }
+
+ if (socket_lpm6[socket] == NULL) {
+ RTE_LOG(INFO, IP_FRAG, "Creating LPM6 table on socket %i\n", socket);
+ snprintf(buf, sizeof(buf), "IP_FRAG_LPM_%i", socket);
+
+ lpm6 = rte_lpm6_create("IP_FRAG_LPM6", socket, &lpm6_config);
+ if (lpm6 == NULL) {
+ RTE_LOG(ERR, IP_FRAG, "Cannot create LPM table\n");
+ return -1;
+ }
+ socket_lpm6[socket] = lpm6;
+ }
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ struct rx_queue *rxq;
+ int socket, ret;
+ unsigned nb_ports;
+ uint16_t queueid = 0;
+ unsigned lcore_id = 0, rx_lcore_id = 0;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eal_init failed");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid arguments");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+ else if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No ports found!\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize structures (mempools, lpm etc.) */
+ if (init_mem() < 0)
+ rte_panic("Cannot initialize memory structures!\n");
+
+ /* check if portmask has non-existent ports */
+ if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
+ rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %d\n", portid);
+ continue;
+ }
+
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
+
+ rx_lcore_id ++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+
+ qconf = &lcore_queue_conf[rx_lcore_id];
+ }
+
+ socket = (int) rte_lcore_to_socket_id(rx_lcore_id);
+ if (socket == SOCKET_ID_ANY)
+ socket = 0;
+
+ rxq = &qconf->rx_queue_list[qconf->n_rx_queue];
+ rxq->portid = portid;
+ rxq->direct_pool = socket_direct_pool[socket];
+ rxq->indirect_pool = socket_indirect_pool[socket];
+ rxq->lpm = socket_lpm[socket];
+ rxq->lpm6 = socket_lpm6[socket];
+ qconf->n_rx_queue++;
+
+ /* init port */
+ printf("Initializing port %d on lcore %u...", portid,
+ rx_lcore_id);
+ fflush(stdout);
+
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
+ &port_conf);
+ if (ret < 0) {
+ printf("\n");
+ rte_exit(EXIT_FAILURE, "Cannot configure device: "
+ "err=%d, port=%d\n",
+ ret, portid);
+ }
+
+ /* init one RX queue */
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ socket, NULL,
+ socket_direct_pool[socket]);
+ if (ret < 0) {
+ printf("\n");
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
+ "err=%d, port=%d\n",
+ ret, portid);
+ }
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf("\n");
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ socket = (int) rte_lcore_to_socket_id(lcore_id);
+ printf("txq=%u,%d ", lcore_id, queueid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socket, txconf);
+ if (ret < 0) {
+ printf("\n");
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
+ "err=%d, port=%d\n", ret, portid);
+ }
+
+ qconf = &lcore_queue_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+ }
+
+ printf("\n");
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ continue;
+ }
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ if (init_routing_table() < 0)
+ rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/Makefile b/examples/ip_pipeline/Makefile
new file mode 100644
index 00000000..10fe1ba9
--- /dev/null
+++ b/examples/ip_pipeline/Makefile
@@ -0,0 +1,79 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+DIRS-(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ip_pipeline
+
+VPATH += $(SRCDIR)/pipeline
+
+INC += $(wildcard *.h) $(wildcard pipeline/*.h)
+
+# all source are stored in SRCS-y
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := main.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_parse.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_parse_tm.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += config_check.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += init.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += thread.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += thread_fe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += cpu_core_map.c
+
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_common_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_common_fe.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_master_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_master.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_passthrough_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_passthrough.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_firewall_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_firewall.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_classification_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_classification.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_actions_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_flow_actions.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_routing_be.c
+SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) += pipeline_routing.c
+
+CFLAGS += -I$(SRCDIR) -I$(SRCDIR)/pipeline
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -Wno-error=unused-function -Wno-error=unused-variable
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ip_pipeline/app.h b/examples/ip_pipeline/app.h
new file mode 100644
index 00000000..55a98417
--- /dev/null
+++ b/examples/ip_pipeline/app.h
@@ -0,0 +1,949 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_APP_H__
+#define __INCLUDE_APP_H__
+
+#include <stdint.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+#include <rte_sched.h>
+#include <cmdline_parse.h>
+
+#include <rte_ethdev.h>
+
+#include "cpu_core_map.h"
+#include "pipeline.h"
+
+#define APP_PARAM_NAME_SIZE PIPELINE_NAME_SIZE
+#define APP_LINK_PCI_BDF_SIZE 16
+struct app_mempool_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t buffer_size;
+ uint32_t pool_size;
+ uint32_t cache_size;
+ uint32_t cpu_socket_id;
+};
+
+struct app_link_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t pmd_id; /* Generated based on port mask */
+ uint32_t arp_q; /* 0 = Disabled (packets go to default queue 0) */
+ uint32_t tcp_syn_q; /* 0 = Disabled (pkts go to default queue) */
+ uint32_t ip_local_q; /* 0 = Disabled (pkts go to default queue 0) */
+ uint32_t tcp_local_q; /* 0 = Disabled (pkts go to default queue 0) */
+ uint32_t udp_local_q; /* 0 = Disabled (pkts go to default queue 0) */
+ uint32_t sctp_local_q; /* 0 = Disabled (pkts go to default queue 0) */
+ uint32_t state; /* DOWN = 0, UP = 1 */
+ uint32_t ip; /* 0 = Invalid */
+ uint32_t depth; /* Valid only when IP is valid */
+ uint64_t mac_addr; /* Read from HW */
+ char pci_bdf[APP_LINK_PCI_BDF_SIZE];
+
+ struct rte_eth_conf conf;
+ uint8_t promisc;
+};
+
+struct app_pktq_hwq_in_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t mempool_id; /* Position in the app->mempool_params */
+ uint32_t size;
+ uint32_t burst;
+
+ struct rte_eth_rxconf conf;
+};
+
+struct app_pktq_hwq_out_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t size;
+ uint32_t burst;
+ uint32_t dropless;
+ uint64_t n_retries;
+ struct rte_eth_txconf conf;
+};
+
+struct app_pktq_swq_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t size;
+ uint32_t burst_read;
+ uint32_t burst_write;
+ uint32_t dropless;
+ uint64_t n_retries;
+ uint32_t cpu_socket_id;
+ uint32_t ipv4_frag;
+ uint32_t ipv6_frag;
+ uint32_t ipv4_ras;
+ uint32_t ipv6_ras;
+ uint32_t mtu;
+ uint32_t metadata_size;
+ uint32_t mempool_direct_id;
+ uint32_t mempool_indirect_id;
+};
+
+#ifndef APP_FILE_NAME_SIZE
+#define APP_FILE_NAME_SIZE 256
+#endif
+
+#ifndef APP_MAX_SCHED_SUBPORTS
+#define APP_MAX_SCHED_SUBPORTS 8
+#endif
+
+#ifndef APP_MAX_SCHED_PIPES
+#define APP_MAX_SCHED_PIPES 4096
+#endif
+
+struct app_pktq_tm_params {
+ char *name;
+ uint32_t parsed;
+ const char *file_name;
+ struct rte_sched_port_params sched_port_params;
+ struct rte_sched_subport_params
+ sched_subport_params[APP_MAX_SCHED_SUBPORTS];
+ struct rte_sched_pipe_params
+ sched_pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT];
+ int sched_pipe_to_profile[APP_MAX_SCHED_SUBPORTS * APP_MAX_SCHED_PIPES];
+ uint32_t burst_read;
+ uint32_t burst_write;
+};
+
+struct app_pktq_source_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t mempool_id; /* Position in the app->mempool_params array */
+ uint32_t burst;
+ char *file_name; /* Full path of PCAP file to be copied to mbufs */
+ uint32_t n_bytes_per_pkt;
+};
+
+struct app_pktq_sink_params {
+ char *name;
+ uint8_t parsed;
+ char *file_name; /* Full path of PCAP file to be copied to mbufs */
+ uint32_t n_pkts_to_dump;
+};
+
+struct app_msgq_params {
+ char *name;
+ uint32_t parsed;
+ uint32_t size;
+ uint32_t cpu_socket_id;
+};
+
+enum app_pktq_in_type {
+ APP_PKTQ_IN_HWQ,
+ APP_PKTQ_IN_SWQ,
+ APP_PKTQ_IN_TM,
+ APP_PKTQ_IN_SOURCE,
+};
+
+struct app_pktq_in_params {
+ enum app_pktq_in_type type;
+ uint32_t id; /* Position in the appropriate app array */
+};
+
+enum app_pktq_out_type {
+ APP_PKTQ_OUT_HWQ,
+ APP_PKTQ_OUT_SWQ,
+ APP_PKTQ_OUT_TM,
+ APP_PKTQ_OUT_SINK,
+};
+
+struct app_pktq_out_params {
+ enum app_pktq_out_type type;
+ uint32_t id; /* Position in the appropriate app array */
+};
+
+#ifndef APP_PIPELINE_TYPE_SIZE
+#define APP_PIPELINE_TYPE_SIZE 64
+#endif
+
+#define APP_MAX_PIPELINE_PKTQ_IN PIPELINE_MAX_PORT_IN
+#define APP_MAX_PIPELINE_PKTQ_OUT PIPELINE_MAX_PORT_OUT
+#define APP_MAX_PIPELINE_MSGQ_IN PIPELINE_MAX_MSGQ_IN
+#define APP_MAX_PIPELINE_MSGQ_OUT PIPELINE_MAX_MSGQ_OUT
+
+#define APP_MAX_PIPELINE_ARGS PIPELINE_MAX_ARGS
+
+struct app_pipeline_params {
+ char *name;
+ uint8_t parsed;
+
+ char type[APP_PIPELINE_TYPE_SIZE];
+
+ uint32_t socket_id;
+ uint32_t core_id;
+ uint32_t hyper_th_id;
+
+ struct app_pktq_in_params pktq_in[APP_MAX_PIPELINE_PKTQ_IN];
+ struct app_pktq_out_params pktq_out[APP_MAX_PIPELINE_PKTQ_OUT];
+ uint32_t msgq_in[APP_MAX_PIPELINE_MSGQ_IN];
+ uint32_t msgq_out[APP_MAX_PIPELINE_MSGQ_OUT];
+
+ uint32_t n_pktq_in;
+ uint32_t n_pktq_out;
+ uint32_t n_msgq_in;
+ uint32_t n_msgq_out;
+
+ uint32_t timer_period;
+
+ char *args_name[APP_MAX_PIPELINE_ARGS];
+ char *args_value[APP_MAX_PIPELINE_ARGS];
+ uint32_t n_args;
+};
+
+struct app_pipeline_data {
+ void *be;
+ void *fe;
+ struct pipeline_type *ptype;
+ uint64_t timer_period;
+ uint32_t enabled;
+};
+
+struct app_thread_pipeline_data {
+ uint32_t pipeline_id;
+ void *be;
+ pipeline_be_op_run f_run;
+ pipeline_be_op_timer f_timer;
+ uint64_t timer_period;
+ uint64_t deadline;
+};
+
+#ifndef APP_MAX_THREAD_PIPELINES
+#define APP_MAX_THREAD_PIPELINES 16
+#endif
+
+#ifndef APP_THREAD_TIMER_PERIOD
+#define APP_THREAD_TIMER_PERIOD 1
+#endif
+
+struct app_thread_data {
+ struct app_thread_pipeline_data regular[APP_MAX_THREAD_PIPELINES];
+ struct app_thread_pipeline_data custom[APP_MAX_THREAD_PIPELINES];
+
+ uint32_t n_regular;
+ uint32_t n_custom;
+
+ uint64_t timer_period;
+ uint64_t thread_req_deadline;
+
+ uint64_t deadline;
+
+ struct rte_ring *msgq_in;
+ struct rte_ring *msgq_out;
+
+ uint64_t headroom_time;
+ uint64_t headroom_cycles;
+ double headroom_ratio;
+};
+
+#ifndef APP_MAX_LINKS
+#define APP_MAX_LINKS 16
+#endif
+
+struct app_eal_params {
+ /* Map lcore set to physical cpu set */
+ char *coremap;
+
+ /* Core ID that is used as master */
+ uint32_t master_lcore_present;
+ uint32_t master_lcore;
+
+ /* Number of memory channels */
+ uint32_t channels_present;
+ uint32_t channels;
+
+ /* Memory to allocate (see also --socket-mem) */
+ uint32_t memory_present;
+ uint32_t memory;
+
+ /* Force number of memory ranks (don't detect) */
+ uint32_t ranks_present;
+ uint32_t ranks;
+
+ /* Add a PCI device in black list. */
+ char *pci_blacklist[APP_MAX_LINKS];
+
+ /* Add a PCI device in white list. */
+ char *pci_whitelist[APP_MAX_LINKS];
+
+ /* Add a virtual device. */
+ char *vdev[APP_MAX_LINKS];
+
+ /* Use VMware TSC map instead of native RDTSC */
+ uint32_t vmware_tsc_map_present;
+ int vmware_tsc_map;
+
+ /* Type of this process (primary|secondary|auto) */
+ char *proc_type;
+
+ /* Set syslog facility */
+ char *syslog;
+
+ /* Set default log level */
+ uint32_t log_level_present;
+ uint32_t log_level;
+
+ /* Display version information on startup */
+ uint32_t version_present;
+ int version;
+
+ /* This help */
+ uint32_t help_present;
+ int help;
+
+ /* Use malloc instead of hugetlbfs */
+ uint32_t no_huge_present;
+ int no_huge;
+
+ /* Disable PCI */
+ uint32_t no_pci_present;
+ int no_pci;
+
+ /* Disable HPET */
+ uint32_t no_hpet_present;
+ int no_hpet;
+
+ /* No shared config (mmap'd files) */
+ uint32_t no_shconf_present;
+ int no_shconf;
+
+ /* Add driver */
+ char *add_driver;
+
+ /* Memory to allocate on sockets (comma separated values)*/
+ char *socket_mem;
+
+ /* Directory where hugetlbfs is mounted */
+ char *huge_dir;
+
+ /* Prefix for hugepage filenames */
+ char *file_prefix;
+
+ /* Base virtual address */
+ char *base_virtaddr;
+
+ /* Create /dev/uioX (usually done by hotplug) */
+ uint32_t create_uio_dev_present;
+ int create_uio_dev;
+
+ /* Interrupt mode for VFIO (legacy|msi|msix) */
+ char *vfio_intr;
+
+ /* Support running on Xen dom0 without hugetlbfs */
+ uint32_t xen_dom0_present;
+ int xen_dom0;
+};
+
+#ifndef APP_APPNAME_SIZE
+#define APP_APPNAME_SIZE 256
+#endif
+
+#ifndef APP_MAX_MEMPOOLS
+#define APP_MAX_MEMPOOLS 8
+#endif
+
+#ifndef APP_LINK_MAX_HWQ_IN
+#define APP_LINK_MAX_HWQ_IN 64
+#endif
+
+#ifndef APP_LINK_MAX_HWQ_OUT
+#define APP_LINK_MAX_HWQ_OUT 64
+#endif
+
+#define APP_MAX_HWQ_IN (APP_MAX_LINKS * APP_LINK_MAX_HWQ_IN)
+
+#define APP_MAX_HWQ_OUT (APP_MAX_LINKS * APP_LINK_MAX_HWQ_OUT)
+
+#ifndef APP_MAX_PKTQ_SWQ
+#define APP_MAX_PKTQ_SWQ 256
+#endif
+
+#define APP_MAX_PKTQ_TM APP_MAX_LINKS
+
+#ifndef APP_MAX_PKTQ_SOURCE
+#define APP_MAX_PKTQ_SOURCE 16
+#endif
+
+#ifndef APP_MAX_PKTQ_SINK
+#define APP_MAX_PKTQ_SINK 16
+#endif
+
+#ifndef APP_MAX_MSGQ
+#define APP_MAX_MSGQ 64
+#endif
+
+#ifndef APP_MAX_PIPELINES
+#define APP_MAX_PIPELINES 64
+#endif
+
+#ifndef APP_EAL_ARGC
+#define APP_EAL_ARGC 32
+#endif
+
+#ifndef APP_MAX_PIPELINE_TYPES
+#define APP_MAX_PIPELINE_TYPES 64
+#endif
+
+#ifndef APP_MAX_THREADS
+#define APP_MAX_THREADS RTE_MAX_LCORE
+#endif
+
+#ifndef APP_MAX_CMDS
+#define APP_MAX_CMDS 64
+#endif
+
+#ifndef APP_THREAD_HEADROOM_STATS_COLLECT
+#define APP_THREAD_HEADROOM_STATS_COLLECT 1
+#endif
+
+struct app_params {
+ /* Config */
+ char app_name[APP_APPNAME_SIZE];
+ const char *config_file;
+ const char *script_file;
+ const char *parser_file;
+ const char *output_file;
+ const char *preproc;
+ const char *preproc_args;
+ uint64_t port_mask;
+ uint32_t log_level;
+
+ struct app_eal_params eal_params;
+ struct app_mempool_params mempool_params[APP_MAX_MEMPOOLS];
+ struct app_link_params link_params[APP_MAX_LINKS];
+ struct app_pktq_hwq_in_params hwq_in_params[APP_MAX_HWQ_IN];
+ struct app_pktq_hwq_out_params hwq_out_params[APP_MAX_HWQ_OUT];
+ struct app_pktq_swq_params swq_params[APP_MAX_PKTQ_SWQ];
+ struct app_pktq_tm_params tm_params[APP_MAX_PKTQ_TM];
+ struct app_pktq_source_params source_params[APP_MAX_PKTQ_SOURCE];
+ struct app_pktq_sink_params sink_params[APP_MAX_PKTQ_SINK];
+ struct app_msgq_params msgq_params[APP_MAX_MSGQ];
+ struct app_pipeline_params pipeline_params[APP_MAX_PIPELINES];
+
+ uint32_t n_mempools;
+ uint32_t n_links;
+ uint32_t n_pktq_hwq_in;
+ uint32_t n_pktq_hwq_out;
+ uint32_t n_pktq_swq;
+ uint32_t n_pktq_tm;
+ uint32_t n_pktq_source;
+ uint32_t n_pktq_sink;
+ uint32_t n_msgq;
+ uint32_t n_pipelines;
+
+ /* Init */
+ char *eal_argv[1 + APP_EAL_ARGC];
+ struct cpu_core_map *core_map;
+ uint64_t core_mask;
+ struct rte_mempool *mempool[APP_MAX_MEMPOOLS];
+ struct rte_ring *swq[APP_MAX_PKTQ_SWQ];
+ struct rte_sched_port *tm[APP_MAX_PKTQ_TM];
+ struct rte_ring *msgq[APP_MAX_MSGQ];
+ struct pipeline_type pipeline_type[APP_MAX_PIPELINE_TYPES];
+ struct app_pipeline_data pipeline_data[APP_MAX_PIPELINES];
+ struct app_thread_data thread_data[APP_MAX_THREADS];
+ cmdline_parse_ctx_t cmds[APP_MAX_CMDS + 1];
+
+ int eal_argc;
+ uint32_t n_pipeline_types;
+ uint32_t n_cmds;
+};
+
+#define APP_PARAM_VALID(obj) ((obj)->name != NULL)
+
+#define APP_PARAM_COUNT(obj_array, n_objs) \
+{ \
+ size_t i; \
+ \
+ n_objs = 0; \
+ for (i = 0; i < RTE_DIM(obj_array); i++) \
+ if (APP_PARAM_VALID(&((obj_array)[i]))) \
+ n_objs++; \
+}
+
+#define APP_PARAM_FIND(obj_array, key) \
+({ \
+ ssize_t obj_idx; \
+ const ssize_t obj_count = RTE_DIM(obj_array); \
+ \
+ for (obj_idx = 0; obj_idx < obj_count; obj_idx++) { \
+ if (!APP_PARAM_VALID(&((obj_array)[obj_idx]))) \
+ continue; \
+ \
+ if (strcmp(key, (obj_array)[obj_idx].name) == 0) \
+ break; \
+ } \
+ obj_idx < obj_count ? obj_idx : -ENOENT; \
+})
+
+#define APP_PARAM_FIND_BY_ID(obj_array, prefix, id, obj) \
+do { \
+ char name[APP_PARAM_NAME_SIZE]; \
+ ssize_t pos; \
+ \
+ sprintf(name, prefix "%" PRIu32, id); \
+ pos = APP_PARAM_FIND(obj_array, name); \
+ obj = (pos < 0) ? NULL : &((obj_array)[pos]); \
+} while (0)
+
+#define APP_PARAM_GET_ID(obj, prefix, id) \
+do \
+ sscanf(obj->name, prefix "%" SCNu32, &id); \
+while (0) \
+
+#define APP_PARAM_ADD(obj_array, obj_name) \
+({ \
+ ssize_t obj_idx; \
+ const ssize_t obj_count = RTE_DIM(obj_array); \
+ \
+ obj_idx = APP_PARAM_FIND(obj_array, obj_name); \
+ if (obj_idx < 0) { \
+ for (obj_idx = 0; obj_idx < obj_count; obj_idx++) { \
+ if (!APP_PARAM_VALID(&((obj_array)[obj_idx]))) \
+ break; \
+ } \
+ \
+ if (obj_idx < obj_count) { \
+ (obj_array)[obj_idx].name = strdup(obj_name); \
+ if ((obj_array)[obj_idx].name == NULL) \
+ obj_idx = -EINVAL; \
+ } else \
+ obj_idx = -ENOMEM; \
+ } \
+ obj_idx; \
+})
+
+#define APP_CHECK(exp, fmt, ...) \
+do { \
+ if (!(exp)) { \
+ fprintf(stderr, fmt "\n", ## __VA_ARGS__); \
+ abort(); \
+ } \
+} while (0)
+
+enum app_log_level {
+ APP_LOG_LEVEL_HIGH = 1,
+ APP_LOG_LEVEL_LOW,
+ APP_LOG_LEVELS
+};
+
+#define APP_LOG(app, level, fmt, ...) \
+do { \
+ if (app->log_level >= APP_LOG_LEVEL_ ## level) \
+ fprintf(stdout, "[APP] " fmt "\n", ## __VA_ARGS__); \
+} while (0)
+
+static inline uint32_t
+app_link_get_n_rxq(struct app_params *app, struct app_link_params *link)
+{
+ uint32_t n_rxq = 0, link_id, i;
+ uint32_t n_pktq_hwq_in = RTE_MIN(app->n_pktq_hwq_in,
+ RTE_DIM(app->hwq_in_params));
+
+ APP_PARAM_GET_ID(link, "LINK", link_id);
+
+ for (i = 0; i < n_pktq_hwq_in; i++) {
+ struct app_pktq_hwq_in_params *p = &app->hwq_in_params[i];
+ uint32_t rxq_link_id, rxq_queue_id;
+
+ sscanf(p->name, "RXQ%" SCNu32 ".%" SCNu32,
+ &rxq_link_id, &rxq_queue_id);
+ if (rxq_link_id == link_id)
+ n_rxq++;
+ }
+
+ return n_rxq;
+}
+
+static inline uint32_t
+app_link_get_n_txq(struct app_params *app, struct app_link_params *link)
+{
+ uint32_t n_txq = 0, link_id, i;
+ uint32_t n_pktq_hwq_out = RTE_MIN(app->n_pktq_hwq_out,
+ RTE_DIM(app->hwq_out_params));
+
+ APP_PARAM_GET_ID(link, "LINK", link_id);
+
+ for (i = 0; i < n_pktq_hwq_out; i++) {
+ struct app_pktq_hwq_out_params *p = &app->hwq_out_params[i];
+ uint32_t txq_link_id, txq_queue_id;
+
+ sscanf(p->name, "TXQ%" SCNu32 ".%" SCNu32,
+ &txq_link_id, &txq_queue_id);
+ if (txq_link_id == link_id)
+ n_txq++;
+ }
+
+ return n_txq;
+}
+
+static inline uint32_t
+app_rxq_get_readers(struct app_params *app, struct app_pktq_hwq_in_params *rxq)
+{
+ uint32_t pos = rxq - app->hwq_in_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_readers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_in; j++) {
+ struct app_pktq_in_params *pktq = &p->pktq_in[j];
+
+ if ((pktq->type == APP_PKTQ_IN_HWQ) &&
+ (pktq->id == pos))
+ n_readers++;
+ }
+ }
+
+ return n_readers;
+}
+
+static inline uint32_t
+app_swq_get_readers(struct app_params *app, struct app_pktq_swq_params *swq)
+{
+ uint32_t pos = swq - app->swq_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_readers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_in; j++) {
+ struct app_pktq_in_params *pktq = &p->pktq_in[j];
+
+ if ((pktq->type == APP_PKTQ_IN_SWQ) &&
+ (pktq->id == pos))
+ n_readers++;
+ }
+ }
+
+ return n_readers;
+}
+
+static inline uint32_t
+app_tm_get_readers(struct app_params *app, struct app_pktq_tm_params *tm)
+{
+ uint32_t pos = tm - app->tm_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_readers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_in; j++) {
+ struct app_pktq_in_params *pktq = &p->pktq_in[j];
+
+ if ((pktq->type == APP_PKTQ_IN_TM) &&
+ (pktq->id == pos))
+ n_readers++;
+ }
+ }
+
+ return n_readers;
+}
+
+static inline uint32_t
+app_source_get_readers(struct app_params *app,
+struct app_pktq_source_params *source)
+{
+ uint32_t pos = source - app->source_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_readers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_in = RTE_MIN(p->n_pktq_in, RTE_DIM(p->pktq_in));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_in; j++) {
+ struct app_pktq_in_params *pktq = &p->pktq_in[j];
+
+ if ((pktq->type == APP_PKTQ_IN_SOURCE) &&
+ (pktq->id == pos))
+ n_readers++;
+ }
+ }
+
+ return n_readers;
+}
+
+static inline uint32_t
+app_msgq_get_readers(struct app_params *app, struct app_msgq_params *msgq)
+{
+ uint32_t pos = msgq - app->msgq_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_readers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_msgq_in = RTE_MIN(p->n_msgq_in, RTE_DIM(p->msgq_in));
+ uint32_t j;
+
+ for (j = 0; j < n_msgq_in; j++)
+ if (p->msgq_in[j] == pos)
+ n_readers++;
+ }
+
+ return n_readers;
+}
+
+static inline uint32_t
+app_txq_get_writers(struct app_params *app, struct app_pktq_hwq_out_params *txq)
+{
+ uint32_t pos = txq - app->hwq_out_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_writers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out,
+ RTE_DIM(p->pktq_out));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_out; j++) {
+ struct app_pktq_out_params *pktq = &p->pktq_out[j];
+
+ if ((pktq->type == APP_PKTQ_OUT_HWQ) &&
+ (pktq->id == pos))
+ n_writers++;
+ }
+ }
+
+ return n_writers;
+}
+
+static inline uint32_t
+app_swq_get_writers(struct app_params *app, struct app_pktq_swq_params *swq)
+{
+ uint32_t pos = swq - app->swq_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_writers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out,
+ RTE_DIM(p->pktq_out));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_out; j++) {
+ struct app_pktq_out_params *pktq = &p->pktq_out[j];
+
+ if ((pktq->type == APP_PKTQ_OUT_SWQ) &&
+ (pktq->id == pos))
+ n_writers++;
+ }
+ }
+
+ return n_writers;
+}
+
+static inline uint32_t
+app_tm_get_writers(struct app_params *app, struct app_pktq_tm_params *tm)
+{
+ uint32_t pos = tm - app->tm_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_writers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out,
+ RTE_DIM(p->pktq_out));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_out; j++) {
+ struct app_pktq_out_params *pktq = &p->pktq_out[j];
+
+ if ((pktq->type == APP_PKTQ_OUT_TM) &&
+ (pktq->id == pos))
+ n_writers++;
+ }
+ }
+
+ return n_writers;
+}
+
+static inline uint32_t
+app_sink_get_writers(struct app_params *app, struct app_pktq_sink_params *sink)
+{
+ uint32_t pos = sink - app->sink_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_writers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_pktq_out = RTE_MIN(p->n_pktq_out,
+ RTE_DIM(p->pktq_out));
+ uint32_t j;
+
+ for (j = 0; j < n_pktq_out; j++) {
+ struct app_pktq_out_params *pktq = &p->pktq_out[j];
+
+ if ((pktq->type == APP_PKTQ_OUT_SINK) &&
+ (pktq->id == pos))
+ n_writers++;
+ }
+ }
+
+ return n_writers;
+}
+
+static inline uint32_t
+app_msgq_get_writers(struct app_params *app, struct app_msgq_params *msgq)
+{
+ uint32_t pos = msgq - app->msgq_params;
+ uint32_t n_pipelines = RTE_MIN(app->n_pipelines,
+ RTE_DIM(app->pipeline_params));
+ uint32_t n_writers = 0, i;
+
+ for (i = 0; i < n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ uint32_t n_msgq_out = RTE_MIN(p->n_msgq_out,
+ RTE_DIM(p->msgq_out));
+ uint32_t j;
+
+ for (j = 0; j < n_msgq_out; j++)
+ if (p->msgq_out[j] == pos)
+ n_writers++;
+ }
+
+ return n_writers;
+}
+
+static inline struct app_link_params *
+app_get_link_for_rxq(struct app_params *app, struct app_pktq_hwq_in_params *p)
+{
+ char link_name[APP_PARAM_NAME_SIZE];
+ ssize_t link_param_idx;
+ uint32_t rxq_link_id, rxq_queue_id;
+
+ sscanf(p->name, "RXQ%" SCNu32 ".%" SCNu32,
+ &rxq_link_id, &rxq_queue_id);
+ sprintf(link_name, "LINK%" PRIu32, rxq_link_id);
+ link_param_idx = APP_PARAM_FIND(app->link_params, link_name);
+ APP_CHECK((link_param_idx >= 0),
+ "Cannot find %s for %s", link_name, p->name);
+
+ return &app->link_params[link_param_idx];
+}
+
+static inline struct app_link_params *
+app_get_link_for_txq(struct app_params *app, struct app_pktq_hwq_out_params *p)
+{
+ char link_name[APP_PARAM_NAME_SIZE];
+ ssize_t link_param_idx;
+ uint32_t txq_link_id, txq_queue_id;
+
+ sscanf(p->name, "TXQ%" SCNu32 ".%" SCNu32,
+ &txq_link_id, &txq_queue_id);
+ sprintf(link_name, "LINK%" PRIu32, txq_link_id);
+ link_param_idx = APP_PARAM_FIND(app->link_params, link_name);
+ APP_CHECK((link_param_idx >= 0),
+ "Cannot find %s for %s", link_name, p->name);
+
+ return &app->link_params[link_param_idx];
+}
+
+static inline struct app_link_params *
+app_get_link_for_tm(struct app_params *app, struct app_pktq_tm_params *p_tm)
+{
+ char link_name[APP_PARAM_NAME_SIZE];
+ uint32_t link_id;
+ ssize_t link_param_idx;
+
+ sscanf(p_tm->name, "TM%" PRIu32, &link_id);
+ sprintf(link_name, "LINK%" PRIu32, link_id);
+ link_param_idx = APP_PARAM_FIND(app->link_params, link_name);
+ APP_CHECK((link_param_idx >= 0),
+ "Cannot find %s for %s", link_name, p_tm->name);
+
+ return &app->link_params[link_param_idx];
+}
+
+int app_config_init(struct app_params *app);
+
+int app_config_args(struct app_params *app,
+ int argc, char **argv);
+
+int app_config_preproc(struct app_params *app);
+
+int app_config_parse(struct app_params *app,
+ const char *file_name);
+
+int app_config_parse_tm(struct app_params *app);
+
+void app_config_save(struct app_params *app,
+ const char *file_name);
+
+int app_config_check(struct app_params *app);
+
+int app_init(struct app_params *app);
+
+int app_thread(void *arg);
+
+int app_pipeline_type_register(struct app_params *app,
+ struct pipeline_type *ptype);
+
+struct pipeline_type *app_pipeline_type_find(struct app_params *app,
+ char *name);
+
+void app_link_up_internal(struct app_params *app,
+ struct app_link_params *cp);
+
+void app_link_down_internal(struct app_params *app,
+ struct app_link_params *cp);
+
+#endif
diff --git a/examples/ip_pipeline/config/edge_router_downstream.cfg b/examples/ip_pipeline/config/edge_router_downstream.cfg
new file mode 100644
index 00000000..85bbab8f
--- /dev/null
+++ b/examples/ip_pipeline/config/edge_router_downstream.cfg
@@ -0,0 +1,85 @@
+; BSD LICENSE
+;
+; Copyright(c) 2015 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+; An edge router typically sits between two networks such as the provider
+; core network and the provider access network. A typical packet processing
+; pipeline for the downstream traffic (i.e. traffic from core to access
+; network) contains the following functional blocks: Packet RX & Routing,
+; Traffic management and Packet TX. The input packets are assumed to be
+; IPv4, while the output packets are Q-in-Q IPv4.
+
+; A simple implementation for this functional pipeline is presented below.
+
+; Packet Rx & Traffic Management Packet Tx
+; Routing (Pass-Through) (Pass-Through)
+; _____________________ SWQ0 ______________________ SWQ4 _____________________
+; RXQ0.0 --->| |----->| |----->| |---> TXQ0.0
+; | | SWQ1 | | SWQ5 | |
+; RXQ1.0 --->| |----->| |----->| |---> TXQ1.0
+; | (P1) | SWQ2 | (P2) | SWQ6 | (P3) |
+; RXQ2.0 --->| |----->| |----->| |---> TXQ2.0
+; | | SWQ3 | | SWQ7 | |
+; RXQ3.0 --->| |----->| |----->| |---> TXQ3.0
+; |_____________________| |______________________| |_____________________|
+; | _|_ ^ _|_ ^ _|_ ^ _|_ ^
+; | |___|||___|||___|||___||
+; +--> SINK0 |___|||___|||___|||___||
+; (route miss) |__| |__| |__| |__|
+; TM0 TM1 TM2 TM3
+
+[PIPELINE0]
+type = MASTER
+core = 0
+
+[PIPELINE1]
+type = ROUTING
+core = 1
+pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0
+pktq_out = SWQ0 SWQ1 SWQ2 SWQ3 SINK0
+encap = ethernet_qinq
+qinq_sched = test
+ip_hdr_offset = 270; mbuf (128) + headroom (128) + ethernet header (14) = 270
+
+[PIPELINE2]
+type = PASS-THROUGH
+core = 2
+pktq_in = SWQ0 SWQ1 SWQ2 SWQ3 TM0 TM1 TM2 TM3
+pktq_out = TM0 TM1 TM2 TM3 SWQ4 SWQ5 SWQ6 SWQ7
+
+[PIPELINE3]
+type = PASS-THROUGH
+core = 3
+pktq_in = SWQ4 SWQ5 SWQ6 SWQ7
+pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0
+
+[MEMPOOL0]
+pool_size = 2M
diff --git a/examples/ip_pipeline/config/edge_router_downstream.sh b/examples/ip_pipeline/config/edge_router_downstream.sh
new file mode 100644
index 00000000..ce46beb5
--- /dev/null
+++ b/examples/ip_pipeline/config/edge_router_downstream.sh
@@ -0,0 +1,10 @@
+################################################################################
+# Routing: Ether QinQ, ARP off
+################################################################################
+p 1 route add default 4 #SINK0
+p 1 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0 qinq 256 257
+p 1 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1 qinq 258 259
+p 1 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2 qinq 260 261
+p 1 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3 qinq 262 263
+
+p 1 route ls
diff --git a/examples/ip_pipeline/config/edge_router_upstream.cfg b/examples/ip_pipeline/config/edge_router_upstream.cfg
new file mode 100644
index 00000000..a08c5cce
--- /dev/null
+++ b/examples/ip_pipeline/config/edge_router_upstream.cfg
@@ -0,0 +1,110 @@
+; BSD LICENSE
+;
+; Copyright(c) 2015 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; An edge router typically sits between two networks such as the provider
+; core network and the provider access network. A typical packet processing
+; pipeline for the upstream traffic (i.e. traffic from access to core
+; network) contains the following functional blocks: Packet RX & Firewall,
+; Flow classification, Metering, Routing and Packet TX. The input packets
+; are assumed to be Q-in-Q IPv4, while the output packets are MPLS IPv4
+; (with variable number of labels per route).
+
+; A simple implementation for this functional pipeline is presented below.
+
+; Packet Rx & Pass-Through Flow-Classification Flow-Actions Routing
+: Firewall
+; __________ SWQ0 __________ SWQ4 __________ SWQ8 __________ SWQ12 __________
+; RXQ0.0 --->| |------>| |------>| |------>| |------>| |------> TXQ0.0
+; | | SWQ1 | | SWQ5 | | SWQ9 | | SWQ13 | |
+; RXQ1.0 --->| |------>| |------>| |------>| |------>| |------> TXQ1.0
+; | (P1) | SWQ2 | (P2) | SWQ6 | (P3) | SWQ10 | (P4) | SWQ14 | (P5) |
+; RXQ2.0 --->| |------>| |------>| |------>| |------>| |------> TXQ2.0
+; | | SWQ3 | | SWQ7 | | SWQ11 | | SWQ15 | |
+; RXQ3.0 --->| |------>| |------>| |------>| |------>| |------> TXQ3.0
+; |__________| |__________| |__________| |__________| |__________|
+; | | |
+; +--> SINK0 (Default) +--> SINK1 (Default) +--> SINK2 (Route Miss)
+
+
+[PIPELINE0]
+type = MASTER
+core = 0
+
+[PIPELINE1]
+type = FIREWALL
+core = 1
+pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0
+pktq_out = SWQ0 SWQ1 SWQ2 SWQ3 SINK0
+n_rules = 4096
+pkt_type = qinq_ipv4
+
+[PIPELINE2]
+type = PASS-THROUGH
+core = 2
+pktq_in = SWQ0 SWQ1 SWQ2 SWQ3
+pktq_out = SWQ4 SWQ5 SWQ6 SWQ7
+dma_size = 8
+dma_dst_offset = 128; mbuf (128)
+dma_src_offset = 268; mbuf (128) + headroom (128) + 1st ethertype offset (12) = 268
+dma_src_mask = 00000FFF00000FFF; qinq
+dma_hash_offset = 136; dma_dst_offset + dma_size = 136
+
+[PIPELINE3]
+type = FLOW_CLASSIFICATION
+core = 2
+pktq_in = SWQ4 SWQ5 SWQ6 SWQ7
+pktq_out = SWQ8 SWQ9 SWQ10 SWQ11 SINK1
+n_flows = 65536
+key_size = 8; dma_size
+key_offset = 128; dma_dst_offset
+hash_offset = 136; dma_hash_offset
+flowid_offset = 192; mbuf (128) + 64
+
+[PIPELINE4]
+type = FLOW_ACTIONS
+core = 3
+pktq_in = SWQ8 SWQ9 SWQ10 SWQ11
+pktq_out = SWQ12 SWQ13 SWQ14 SWQ15
+n_flows = 65536
+n_meters_per_flow = 1
+flow_id_offset = 192; flowid_offset
+ip_hdr_offset = 278; mbuf (128) + headroom (128) + ethernet (14) + qinq (8) = 278
+color_offset = 196; flowid_offset + sizeof(flow_id)
+
+[PIPELINE5]
+type = ROUTING
+core = 4
+pktq_in = SWQ12 SWQ13 SWQ14 SWQ15
+pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 SINK2
+encap = ethernet_mpls
+mpls_color_mark = yes
+ip_hdr_offset = 278; mbuf (128) + headroom (128) + ethernet (14) + qinq (8) = 278
+color_offset = 196; flowid_offset + sizeof(flow_id)
diff --git a/examples/ip_pipeline/config/edge_router_upstream.sh b/examples/ip_pipeline/config/edge_router_upstream.sh
new file mode 100644
index 00000000..eeba600c
--- /dev/null
+++ b/examples/ip_pipeline/config/edge_router_upstream.sh
@@ -0,0 +1,38 @@
+################################################
+# Firewall Rules:4 for 4 ports
+################################################
+p 1 firewall add ipv4 1 0.0.0.0 8 0.0.0.0 10 0 0 0 0 6 1 0
+p 1 firewall add ipv4 1 0.0.0.0 8 0.64.0.0 10 0 0 0 0 6 1 1
+p 1 firewall add ipv4 1 0.0.0.0 8 0.128.0.0 10 0 0 0 0 6 1 2
+p 1 firewall add ipv4 1 0.0.0.0 8 0.192.0.0 10 0 0 0 0 6 1 3
+p 1 firewall add default 4 #SINK0
+
+
+################################################################################
+# Flow classification
+################################################################################
+p 3 flow add default 4 #SINK1
+p 3 flow add qinq all 65536 4
+
+################################################################################
+# Flow Actions - Metering
+################################################################################
+p 4 flows 65536 meter 0 trtcm 1250000000 1250000000 100000000 100000000
+p 4 flows 65536 ports 4
+
+################################################################################
+# Routing: Ether MPLS, ARP off
+################################################################################
+p 5 route add default 4 #SINK2
+p 5 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0 mpls 0:1
+p 5 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1 mpls 10:11
+p 5 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2 mpls 20:21
+p 5 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3 mpls 30:31
+
+################################################################################
+# List all configurations
+################################################################################
+p 1 firewall ls
+#p 3 flow ls
+#p 4 flow actions ls
+p 5 route ls
diff --git a/examples/ip_pipeline/config/ip_pipeline.cfg b/examples/ip_pipeline/config/ip_pipeline.cfg
new file mode 100644
index 00000000..095ed25e
--- /dev/null
+++ b/examples/ip_pipeline/config/ip_pipeline.cfg
@@ -0,0 +1,9 @@
+[PIPELINE0]
+type = MASTER
+core = 0
+
+[PIPELINE1]
+type = PASS-THROUGH
+core = 1
+pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0
+pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0
diff --git a/examples/ip_pipeline/config/ip_pipeline.sh b/examples/ip_pipeline/config/ip_pipeline.sh
new file mode 100644
index 00000000..4fca2597
--- /dev/null
+++ b/examples/ip_pipeline/config/ip_pipeline.sh
@@ -0,0 +1,5 @@
+#
+#run config/ip_pipeline.sh
+#
+
+p 1 ping
diff --git a/examples/ip_pipeline/config/l2fwd.cfg b/examples/ip_pipeline/config/l2fwd.cfg
new file mode 100644
index 00000000..c743a143
--- /dev/null
+++ b/examples/ip_pipeline/config/l2fwd.cfg
@@ -0,0 +1,55 @@
+; BSD LICENSE
+;
+; Copyright(c) 2015 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+;
+; The pass-through pipeline below connects the input ports to the output ports
+; as follows: RXQ0.0 -> TXQ1.0, RXQ1.0 -> TXQ0.0, RXQ2.0 -> TXQ3.0 and
+; RXQ3.0 -> TXQ2.0.
+; ________________
+; RXQ0.0 --->|................|---> TXQ1.0
+; | |
+; RXQ1.0 --->|................|---> TXQ0.0
+; | Pass-through |
+; RXQ2.0 --->|................|---> TXQ3.0
+; | |
+; RXQ3.0 --->|................|---> TXQ2.0
+; |________________|
+;
+
+[PIPELINE0]
+type = MASTER
+core = 0
+
+[PIPELINE1]
+type = PASS-THROUGH
+core = 1
+pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0
+pktq_out = TXQ1.0 TXQ0.0 TXQ3.0 TXQ2.0
diff --git a/examples/ip_pipeline/config/l3fwd.cfg b/examples/ip_pipeline/config/l3fwd.cfg
new file mode 100644
index 00000000..5449dc32
--- /dev/null
+++ b/examples/ip_pipeline/config/l3fwd.cfg
@@ -0,0 +1,63 @@
+; BSD LICENSE
+;
+; Copyright(c) 2015 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; _______________
+; RXQ0.0 --->| |---> TXQ0.0
+; | |
+; RXQ1.0 --->| |---> TXQ1.0
+; | Routing |
+; RXQ2.0 --->| |---> TXQ2.0
+; | |
+; RXQ3.0 --->| |---> TXQ3.0
+; |_______________|
+; |
+; +-----------> SINK0 (route miss)
+;
+; Input packet: Ethernet/IPv4
+;
+; Packet buffer layout:
+; # Field Name Offset (Bytes) Size (Bytes)
+; 0 Mbuf 0 128
+; 1 Headroom 128 128
+; 2 Ethernet header 256 14
+; 3 IPv4 header 270 20
+
+[PIPELINE0]
+type = MASTER
+core = 0
+
+[PIPELINE1]
+type = ROUTING
+core = 1
+pktq_in = RXQ0.0 RXQ1.0 RXQ2.0 RXQ3.0
+pktq_out = TXQ0.0 TXQ1.0 TXQ2.0 TXQ3.0 SINK0
+encap = ethernet; encap = ethernet / ethernet_qinq / ethernet_mpls
+ip_hdr_offset = 270
diff --git a/examples/ip_pipeline/config/l3fwd.sh b/examples/ip_pipeline/config/l3fwd.sh
new file mode 100644
index 00000000..27740103
--- /dev/null
+++ b/examples/ip_pipeline/config/l3fwd.sh
@@ -0,0 +1,9 @@
+################################################################################
+# Routing: encap = ethernet, arp = off
+################################################################################
+p 1 route add default 4 #SINK0
+p 1 route add 0.0.0.0 10 port 0 ether a0:b0:c0:d0:e0:f0
+p 1 route add 0.64.0.0 10 port 1 ether a1:b1:c1:d1:e1:f1
+p 1 route add 0.128.0.0 10 port 2 ether a2:b2:c2:d2:e2:f2
+p 1 route add 0.192.0.0 10 port 3 ether a3:b3:c3:d3:e3:f3
+p 1 route ls
diff --git a/examples/ip_pipeline/config/tm_profile.cfg b/examples/ip_pipeline/config/tm_profile.cfg
new file mode 100644
index 00000000..2dfb215e
--- /dev/null
+++ b/examples/ip_pipeline/config/tm_profile.cfg
@@ -0,0 +1,105 @@
+; BSD LICENSE
+;
+; Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; This file enables the following hierarchical scheduler configuration for each
+; 10GbE output port:
+; * Single subport (subport 0):
+; - Subport rate set to 100% of port rate
+; - Each of the 4 traffic classes has rate set to 100% of port rate
+; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+; - Pipe rate set to 1/4K of port rate
+; - Each of the 4 traffic classes has rate set to 100% of pipe rate
+; - Within each traffic class, the byte-level WRR weights for the 4 queues
+; are set to 1:1:1:1
+;
+; For more details, please refer to chapter "Quality of Service (QoS) Framework"
+; of Data Plane Development Kit (DPDK) Programmer's Guide.
+
+; Port configuration
+[port]
+frame overhead = 24 ; frame overhead = Preamble (7) + SFD (1) + FCS (4) + IFG (12)
+mtu = 1522; mtu = Q-in-Q MTU (FCS not included)
+number of subports per port = 1
+number of pipes per subport = 4096
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 1250000000 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 1250000000 ; Bytes per second
+tc 1 rate = 1250000000 ; Bytes per second
+tc 2 rate = 1250000000 ; Bytes per second
+tc 3 rate = 1250000000 ; Bytes per second
+tc period = 10 ; Milliseconds
+
+pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 305175 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 305175 ; Bytes per second
+tc 1 rate = 305175 ; Bytes per second
+tc 2 rate = 305175 ; Bytes per second
+tc 3 rate = 305175 ; Bytes per second
+tc period = 40 ; Milliseconds
+
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
diff --git a/examples/ip_pipeline/config_check.c b/examples/ip_pipeline/config_check.c
new file mode 100644
index 00000000..fd9ff495
--- /dev/null
+++ b/examples/ip_pipeline/config_check.c
@@ -0,0 +1,444 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+
+#include <rte_ip.h>
+
+#include "app.h"
+
+static void
+check_mempools(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_mempools; i++) {
+ struct app_mempool_params *p = &app->mempool_params[i];
+
+ APP_CHECK((p->pool_size > 0),
+ "Mempool %s size is 0\n", p->name);
+
+ APP_CHECK((p->cache_size > 0),
+ "Mempool %s cache size is 0\n", p->name);
+
+ APP_CHECK(rte_is_power_of_2(p->cache_size),
+ "Mempool %s cache size not a power of 2\n", p->name);
+ }
+}
+
+static void
+check_links(struct app_params *app)
+{
+ uint32_t i;
+
+ /* Check that number of links matches the port mask */
+ if (app->port_mask) {
+ uint32_t n_links_port_mask =
+ __builtin_popcountll(app->port_mask);
+
+ APP_CHECK((app->n_links == n_links_port_mask),
+ "Not enough links provided in the PORT_MASK\n");
+ }
+
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *link = &app->link_params[i];
+ uint32_t rxq_max, n_rxq, n_txq, link_id, i;
+
+ APP_PARAM_GET_ID(link, "LINK", link_id);
+
+ /* Check that link RXQs are contiguous */
+ rxq_max = 0;
+ if (link->arp_q > rxq_max)
+ rxq_max = link->arp_q;
+ if (link->tcp_syn_q > rxq_max)
+ rxq_max = link->tcp_syn_q;
+ if (link->ip_local_q > rxq_max)
+ rxq_max = link->ip_local_q;
+ if (link->tcp_local_q > rxq_max)
+ rxq_max = link->tcp_local_q;
+ if (link->udp_local_q > rxq_max)
+ rxq_max = link->udp_local_q;
+ if (link->sctp_local_q > rxq_max)
+ rxq_max = link->sctp_local_q;
+
+ for (i = 1; i <= rxq_max; i++)
+ APP_CHECK(((link->arp_q == i) ||
+ (link->tcp_syn_q == i) ||
+ (link->ip_local_q == i) ||
+ (link->tcp_local_q == i) ||
+ (link->udp_local_q == i) ||
+ (link->sctp_local_q == i)),
+ "%s RXQs are not contiguous (A)\n", link->name);
+
+ n_rxq = app_link_get_n_rxq(app, link);
+
+ APP_CHECK((n_rxq), "%s does not have any RXQ\n", link->name);
+
+ APP_CHECK((n_rxq == rxq_max + 1),
+ "%s RXQs are not contiguous (B)\n", link->name);
+
+ for (i = 0; i < n_rxq; i++) {
+ char name[APP_PARAM_NAME_SIZE];
+ int pos;
+
+ sprintf(name, "RXQ%" PRIu32 ".%" PRIu32,
+ link_id, i);
+ pos = APP_PARAM_FIND(app->hwq_in_params, name);
+ APP_CHECK((pos >= 0),
+ "%s RXQs are not contiguous (C)\n", link->name);
+ }
+
+ /* Check that link RXQs are contiguous */
+ n_txq = app_link_get_n_txq(app, link);
+
+ APP_CHECK((n_txq), "%s does not have any TXQ\n", link->name);
+
+ for (i = 0; i < n_txq; i++) {
+ char name[APP_PARAM_NAME_SIZE];
+ int pos;
+
+ sprintf(name, "TXQ%" PRIu32 ".%" PRIu32,
+ link_id, i);
+ pos = APP_PARAM_FIND(app->hwq_out_params, name);
+ APP_CHECK((pos >= 0),
+ "%s TXQs are not contiguous\n", link->name);
+ }
+ }
+}
+
+static void
+check_rxqs(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_hwq_in; i++) {
+ struct app_pktq_hwq_in_params *p = &app->hwq_in_params[i];
+ uint32_t n_readers = app_rxq_get_readers(app, p);
+
+ APP_CHECK((p->size > 0),
+ "%s size is 0\n", p->name);
+
+ APP_CHECK((rte_is_power_of_2(p->size)),
+ "%s size is not a power of 2\n", p->name);
+
+ APP_CHECK((p->burst > 0),
+ "%s burst size is 0\n", p->name);
+
+ APP_CHECK((p->burst <= p->size),
+ "%s burst size is bigger than its size\n", p->name);
+
+ APP_CHECK((n_readers != 0),
+ "%s has no reader\n", p->name);
+
+ APP_CHECK((n_readers == 1),
+ "%s has more than one reader\n", p->name);
+ }
+}
+
+static void
+check_txqs(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_hwq_out; i++) {
+ struct app_pktq_hwq_out_params *p = &app->hwq_out_params[i];
+ uint32_t n_writers = app_txq_get_writers(app, p);
+
+ APP_CHECK((p->size > 0),
+ "%s size is 0\n", p->name);
+
+ APP_CHECK((rte_is_power_of_2(p->size)),
+ "%s size is not a power of 2\n", p->name);
+
+ APP_CHECK((p->burst > 0),
+ "%s burst size is 0\n", p->name);
+
+ APP_CHECK((p->burst <= p->size),
+ "%s burst size is bigger than its size\n", p->name);
+
+ APP_CHECK((n_writers != 0),
+ "%s has no writer\n", p->name);
+
+ APP_CHECK((n_writers == 1),
+ "%s has more than one writer\n", p->name);
+ }
+}
+
+static void
+check_swqs(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_swq; i++) {
+ struct app_pktq_swq_params *p = &app->swq_params[i];
+ uint32_t n_readers = app_swq_get_readers(app, p);
+ uint32_t n_writers = app_swq_get_writers(app, p);
+ uint32_t n_flags;
+
+ APP_CHECK((p->size > 0),
+ "%s size is 0\n", p->name);
+
+ APP_CHECK((rte_is_power_of_2(p->size)),
+ "%s size is not a power of 2\n", p->name);
+
+ APP_CHECK((p->burst_read > 0),
+ "%s read burst size is 0\n", p->name);
+
+ APP_CHECK((p->burst_read <= p->size),
+ "%s read burst size is bigger than its size\n",
+ p->name);
+
+ APP_CHECK((p->burst_write > 0),
+ "%s write burst size is 0\n", p->name);
+
+ APP_CHECK((p->burst_write <= p->size),
+ "%s write burst size is bigger than its size\n",
+ p->name);
+
+ APP_CHECK((n_readers != 0),
+ "%s has no reader\n", p->name);
+
+ if (n_readers > 1)
+ APP_LOG(app, LOW, "%s has more than one reader", p->name);
+
+ APP_CHECK((n_writers != 0),
+ "%s has no writer\n", p->name);
+
+ if (n_writers > 1)
+ APP_LOG(app, LOW, "%s has more than one writer", p->name);
+
+ n_flags = p->ipv4_frag + p->ipv6_frag + p->ipv4_ras + p->ipv6_ras;
+
+ APP_CHECK((n_flags < 2),
+ "%s has more than one fragmentation or reassembly mode enabled\n",
+ p->name);
+
+ APP_CHECK((!((n_readers > 1) && (n_flags == 1))),
+ "%s has more than one reader when fragmentation or reassembly"
+ " mode enabled\n",
+ p->name);
+
+ APP_CHECK((!((n_writers > 1) && (n_flags == 1))),
+ "%s has more than one writer when fragmentation or reassembly"
+ " mode enabled\n",
+ p->name);
+
+ n_flags = p->ipv4_ras + p->ipv6_ras;
+
+ APP_CHECK((!((p->dropless == 1) && (n_flags == 1))),
+ "%s has dropless when reassembly mode enabled\n", p->name);
+
+ n_flags = p->ipv4_frag + p->ipv6_frag;
+
+ if (n_flags == 1) {
+ uint16_t ip_hdr_size = (p->ipv4_frag) ? sizeof(struct ipv4_hdr) :
+ sizeof(struct ipv6_hdr);
+
+ APP_CHECK((p->mtu > ip_hdr_size),
+ "%s mtu size is smaller than ip header\n", p->name);
+
+ APP_CHECK((!((p->mtu - ip_hdr_size) % 8)),
+ "%s mtu size is incorrect\n", p->name);
+ }
+ }
+}
+
+static void
+check_tms(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_tm; i++) {
+ struct app_pktq_tm_params *p = &app->tm_params[i];
+ uint32_t n_readers = app_tm_get_readers(app, p);
+ uint32_t n_writers = app_tm_get_writers(app, p);
+
+ APP_CHECK((n_readers != 0),
+ "%s has no reader\n", p->name);
+
+ APP_CHECK((n_readers == 1),
+ "%s has more than one reader\n", p->name);
+
+ APP_CHECK((n_writers != 0),
+ "%s has no writer\n", p->name);
+
+ APP_CHECK((n_writers == 1),
+ "%s has more than one writer\n", p->name);
+ }
+}
+
+static void
+check_sources(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_source; i++) {
+ struct app_pktq_source_params *p = &app->source_params[i];
+ uint32_t n_readers = app_source_get_readers(app, p);
+
+ APP_CHECK((n_readers != 0),
+ "%s has no reader\n", p->name);
+
+ APP_CHECK((n_readers == 1),
+ "%s has more than one reader\n", p->name);
+ }
+}
+
+static void
+check_sinks(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_sink; i++) {
+ struct app_pktq_sink_params *p = &app->sink_params[i];
+ uint32_t n_writers = app_sink_get_writers(app, p);
+
+ APP_CHECK((n_writers != 0),
+ "%s has no writer\n", p->name);
+
+ APP_CHECK((n_writers == 1),
+ "%s has more than one writer\n", p->name);
+ }
+}
+
+static void
+check_msgqs(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_msgq; i++) {
+ struct app_msgq_params *p = &app->msgq_params[i];
+ uint32_t n_readers = app_msgq_get_readers(app, p);
+ uint32_t n_writers = app_msgq_get_writers(app, p);
+ uint32_t msgq_req_pipeline, msgq_rsp_pipeline;
+ uint32_t msgq_req_core, msgq_rsp_core;
+
+ APP_CHECK((p->size > 0),
+ "%s size is 0\n", p->name);
+
+ APP_CHECK((rte_is_power_of_2(p->size)),
+ "%s size is not a power of 2\n", p->name);
+
+ msgq_req_pipeline = (strncmp(p->name, "MSGQ-REQ-PIPELINE",
+ strlen("MSGQ-REQ-PIPELINE")) == 0);
+
+ msgq_rsp_pipeline = (strncmp(p->name, "MSGQ-RSP-PIPELINE",
+ strlen("MSGQ-RSP-PIPELINE")) == 0);
+
+ msgq_req_core = (strncmp(p->name, "MSGQ-REQ-CORE",
+ strlen("MSGQ-REQ-CORE")) == 0);
+
+ msgq_rsp_core = (strncmp(p->name, "MSGQ-RSP-CORE",
+ strlen("MSGQ-RSP-CORE")) == 0);
+
+ if ((msgq_req_pipeline == 0) &&
+ (msgq_rsp_pipeline == 0) &&
+ (msgq_req_core == 0) &&
+ (msgq_rsp_core == 0)) {
+ APP_CHECK((n_readers != 0),
+ "%s has no reader\n", p->name);
+
+ APP_CHECK((n_readers == 1),
+ "%s has more than one reader\n", p->name);
+
+ APP_CHECK((n_writers != 0),
+ "%s has no writer\n", p->name);
+
+ APP_CHECK((n_writers == 1),
+ "%s has more than one writer\n", p->name);
+ }
+
+ if (msgq_req_pipeline) {
+ struct app_pipeline_params *pipeline;
+ uint32_t pipeline_id;
+
+ APP_PARAM_GET_ID(p, "MSGQ-REQ-PIPELINE", pipeline_id);
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params,
+ "PIPELINE",
+ pipeline_id,
+ pipeline);
+
+ APP_CHECK((pipeline != NULL),
+ "%s is not associated with a valid pipeline\n",
+ p->name);
+ }
+
+ if (msgq_rsp_pipeline) {
+ struct app_pipeline_params *pipeline;
+ uint32_t pipeline_id;
+
+ APP_PARAM_GET_ID(p, "MSGQ-RSP-PIPELINE", pipeline_id);
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params,
+ "PIPELINE",
+ pipeline_id,
+ pipeline);
+
+ APP_CHECK((pipeline != NULL),
+ "%s is not associated with a valid pipeline\n",
+ p->name);
+ }
+ }
+}
+
+static void
+check_pipelines(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+
+ APP_CHECK((p->n_msgq_in == p->n_msgq_out),
+ "%s number of input MSGQs does not match "
+ "the number of output MSGQs\n", p->name);
+ }
+}
+
+int
+app_config_check(struct app_params *app)
+{
+ check_mempools(app);
+ check_links(app);
+ check_rxqs(app);
+ check_txqs(app);
+ check_swqs(app);
+ check_tms(app);
+ check_sources(app);
+ check_sinks(app);
+ check_msgqs(app);
+ check_pipelines(app);
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/config_parse.c b/examples/ip_pipeline/config_parse.c
new file mode 100644
index 00000000..e5efd03e
--- /dev/null
+++ b/examples/ip_pipeline/config_parse.c
@@ -0,0 +1,3383 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <libgen.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <rte_errno.h>
+#include <rte_cfgfile.h>
+#include <rte_string_fns.h>
+
+#include "app.h"
+#include "parser.h"
+
+/**
+ * Default config values
+ **/
+
+static struct app_params app_params_default = {
+ .config_file = "./config/ip_pipeline.cfg",
+ .log_level = APP_LOG_LEVEL_HIGH,
+ .port_mask = 0,
+
+ .eal_params = {
+ .channels = 4,
+ },
+};
+
+static const struct app_mempool_params mempool_params_default = {
+ .parsed = 0,
+ .buffer_size = 2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM,
+ .pool_size = 32 * 1024,
+ .cache_size = 256,
+ .cpu_socket_id = 0,
+};
+
+static const struct app_link_params link_params_default = {
+ .parsed = 0,
+ .pmd_id = 0,
+ .arp_q = 0,
+ .tcp_syn_q = 0,
+ .ip_local_q = 0,
+ .tcp_local_q = 0,
+ .udp_local_q = 0,
+ .sctp_local_q = 0,
+ .state = 0,
+ .ip = 0,
+ .depth = 0,
+ .mac_addr = 0,
+ .pci_bdf = {0},
+
+ .conf = {
+ .link_speeds = 0,
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_NONE,
+
+ .header_split = 0, /* Header split */
+ .hw_ip_checksum = 0, /* IP checksum offload */
+ .hw_vlan_filter = 0, /* VLAN filtering */
+ .hw_vlan_strip = 0, /* VLAN strip */
+ .hw_vlan_extend = 0, /* Extended VLAN */
+ .jumbo_frame = 0, /* Jumbo frame support */
+ .hw_strip_crc = 0, /* CRC strip by HW */
+ .enable_scatter = 0, /* Scattered packets RX handler */
+
+ .max_rx_pkt_len = 9000, /* Jumbo frame max packet len */
+ .split_hdr_size = 0, /* Header split buffer size */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .lpbk_mode = 0,
+ },
+
+ .promisc = 1,
+};
+
+static const struct app_pktq_hwq_in_params default_hwq_in_params = {
+ .parsed = 0,
+ .mempool_id = 0,
+ .size = 128,
+ .burst = 32,
+
+ .conf = {
+ .rx_thresh = {
+ .pthresh = 8,
+ .hthresh = 8,
+ .wthresh = 4,
+ },
+ .rx_free_thresh = 64,
+ .rx_drop_en = 0,
+ .rx_deferred_start = 0,
+ }
+};
+
+static const struct app_pktq_hwq_out_params default_hwq_out_params = {
+ .parsed = 0,
+ .size = 512,
+ .burst = 32,
+ .dropless = 0,
+ .n_retries = 0,
+
+ .conf = {
+ .tx_thresh = {
+ .pthresh = 36,
+ .hthresh = 0,
+ .wthresh = 0,
+ },
+ .tx_rs_thresh = 0,
+ .tx_free_thresh = 0,
+ .txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS |
+ ETH_TXQ_FLAGS_NOOFFLOADS,
+ .tx_deferred_start = 0,
+ }
+};
+
+static const struct app_pktq_swq_params default_swq_params = {
+ .parsed = 0,
+ .size = 256,
+ .burst_read = 32,
+ .burst_write = 32,
+ .dropless = 0,
+ .n_retries = 0,
+ .cpu_socket_id = 0,
+ .ipv4_frag = 0,
+ .ipv6_frag = 0,
+ .ipv4_ras = 0,
+ .ipv6_ras = 0,
+ .mtu = 0,
+ .metadata_size = 0,
+ .mempool_direct_id = 0,
+ .mempool_indirect_id = 0,
+};
+
+struct app_pktq_tm_params default_tm_params = {
+ .parsed = 0,
+ .file_name = "./config/tm_profile.cfg",
+ .burst_read = 64,
+ .burst_write = 32,
+};
+
+struct app_pktq_source_params default_source_params = {
+ .parsed = 0,
+ .mempool_id = 0,
+ .burst = 32,
+ .file_name = NULL,
+ .n_bytes_per_pkt = 0,
+};
+
+struct app_pktq_sink_params default_sink_params = {
+ .parsed = 0,
+ .file_name = NULL,
+ .n_pkts_to_dump = 0,
+};
+
+struct app_msgq_params default_msgq_params = {
+ .parsed = 0,
+ .size = 64,
+ .cpu_socket_id = 0,
+};
+
+struct app_pipeline_params default_pipeline_params = {
+ .parsed = 0,
+ .socket_id = 0,
+ .core_id = 0,
+ .hyper_th_id = 0,
+ .n_pktq_in = 0,
+ .n_pktq_out = 0,
+ .n_msgq_in = 0,
+ .n_msgq_out = 0,
+ .timer_period = 1,
+ .n_args = 0,
+};
+
+static const char app_usage[] =
+ "Usage: %s [-f CONFIG_FILE] [-s SCRIPT_FILE] [-p PORT_MASK] "
+ "[-l LOG_LEVEL] [--preproc PREPROCESSOR] [--preproc-args ARGS]\n"
+ "\n"
+ "Arguments:\n"
+ "\t-f CONFIG_FILE: Default config file is %s\n"
+ "\t-p PORT_MASK: Mask of NIC port IDs in hex format (generated from "
+ "config file when not provided)\n"
+ "\t-s SCRIPT_FILE: No CLI script file is run when not specified\n"
+ "\t-l LOG_LEVEL: 0 = NONE, 1 = HIGH PRIO (default), 2 = LOW PRIO\n"
+ "\t--preproc PREPROCESSOR: Configuration file pre-processor\n"
+ "\t--preproc-args ARGS: Arguments to be passed to pre-processor\n"
+ "\n";
+
+static void
+app_print_usage(char *prgname)
+{
+ rte_exit(0, app_usage, prgname, app_params_default.config_file);
+}
+
+#define skip_white_spaces(pos) \
+({ \
+ __typeof__(pos) _p = (pos); \
+ for ( ; isspace(*_p); _p++); \
+ _p; \
+})
+
+#define PARSER_PARAM_ADD_CHECK(result, params_array, section_name) \
+do { \
+ APP_CHECK((result != -EINVAL), \
+ "Parse error: no free memory"); \
+ APP_CHECK((result != -ENOMEM), \
+ "Parse error: too many \"%s\" sections", section_name); \
+ APP_CHECK(((result >= 0) && (params_array)[result].parsed == 0),\
+ "Parse error: duplicate \"%s\" section", section_name); \
+ APP_CHECK((result >= 0), \
+ "Parse error in section \"%s\"", section_name); \
+} while (0)
+
+int
+parser_read_arg_bool(const char *p)
+{
+ p = skip_white_spaces(p);
+ int result = -EINVAL;
+
+ if (((p[0] == 'y') && (p[1] == 'e') && (p[2] == 's')) ||
+ ((p[0] == 'Y') && (p[1] == 'E') && (p[2] == 'S'))) {
+ p += 3;
+ result = 1;
+ }
+
+ if (((p[0] == 'o') && (p[1] == 'n')) ||
+ ((p[0] == 'O') && (p[1] == 'N'))) {
+ p += 2;
+ result = 1;
+ }
+
+ if (((p[0] == 'n') && (p[1] == 'o')) ||
+ ((p[0] == 'N') && (p[1] == 'O'))) {
+ p += 2;
+ result = 0;
+ }
+
+ if (((p[0] == 'o') && (p[1] == 'f') && (p[2] == 'f')) ||
+ ((p[0] == 'O') && (p[1] == 'F') && (p[2] == 'F'))) {
+ p += 3;
+ result = 0;
+ }
+
+ p = skip_white_spaces(p);
+
+ if (p[0] != '\0')
+ return -EINVAL;
+
+ return result;
+}
+
+#define PARSE_ERROR(exp, section, entry) \
+APP_CHECK(exp, "Parse error in section \"%s\": entry \"%s\"\n", section, entry)
+
+#define PARSE_ERROR_MESSAGE(exp, section, entry, message) \
+APP_CHECK(exp, "Parse error in section \"%s\", entry \"%s\": %s\n", \
+ section, entry, message)
+
+
+#define PARSE_ERROR_MALLOC(exp) \
+APP_CHECK(exp, "Parse error: no free memory\n")
+
+#define PARSE_ERROR_SECTION(exp, section) \
+APP_CHECK(exp, "Parse error in section \"%s\"", section)
+
+#define PARSE_ERROR_SECTION_NO_ENTRIES(exp, section) \
+APP_CHECK(exp, "Parse error in section \"%s\": no entries\n", section)
+
+#define PARSE_WARNING_IGNORED(exp, section, entry) \
+do \
+if (!(exp)) \
+ fprintf(stderr, "Parse warning in section \"%s\": " \
+ "entry \"%s\" is ignored\n", section, entry); \
+while (0)
+
+#define PARSE_ERROR_INVALID(exp, section, entry) \
+APP_CHECK(exp, "Parse error in section \"%s\": unrecognized entry \"%s\"\n",\
+ section, entry)
+
+#define PARSE_ERROR_DUPLICATE(exp, section, entry) \
+APP_CHECK(exp, "Parse error in section \"%s\": duplicate entry \"%s\"\n",\
+ section, entry)
+
+int
+parser_read_uint64(uint64_t *value, const char *p)
+{
+ char *next;
+ uint64_t val;
+
+ p = skip_white_spaces(p);
+ if (!isdigit(*p))
+ return -EINVAL;
+
+ val = strtoul(p, &next, 10);
+ if (p == next)
+ return -EINVAL;
+
+ p = next;
+ switch (*p) {
+ case 'T':
+ val *= 1024ULL;
+ /* fall through */
+ case 'G':
+ val *= 1024ULL;
+ /* fall through */
+ case 'M':
+ val *= 1024ULL;
+ /* fall through */
+ case 'k':
+ case 'K':
+ val *= 1024ULL;
+ p++;
+ break;
+ }
+
+ p = skip_white_spaces(p);
+ if (*p != '\0')
+ return -EINVAL;
+
+ *value = val;
+ return 0;
+}
+
+int
+parser_read_uint32(uint32_t *value, const char *p)
+{
+ uint64_t val = 0;
+ int ret = parser_read_uint64(&val, p);
+
+ if (ret < 0)
+ return ret;
+
+ if (val > UINT32_MAX)
+ return -ERANGE;
+
+ *value = val;
+ return 0;
+}
+
+int
+parse_pipeline_core(uint32_t *socket,
+ uint32_t *core,
+ uint32_t *ht,
+ const char *entry)
+{
+ size_t num_len;
+ char num[8];
+
+ uint32_t s = 0, c = 0, h = 0, val;
+ uint8_t s_parsed = 0, c_parsed = 0, h_parsed = 0;
+ const char *next = skip_white_spaces(entry);
+ char type;
+
+ /* Expect <CORE> or [sX][cY][h]. At least one parameter is required. */
+ while (*next != '\0') {
+ /* If everything parsed nothing should left */
+ if (s_parsed && c_parsed && h_parsed)
+ return -EINVAL;
+
+ type = *next;
+ switch (type) {
+ case 's':
+ case 'S':
+ if (s_parsed || c_parsed || h_parsed)
+ return -EINVAL;
+ s_parsed = 1;
+ next++;
+ break;
+ case 'c':
+ case 'C':
+ if (c_parsed || h_parsed)
+ return -EINVAL;
+ c_parsed = 1;
+ next++;
+ break;
+ case 'h':
+ case 'H':
+ if (h_parsed)
+ return -EINVAL;
+ h_parsed = 1;
+ next++;
+ break;
+ default:
+ /* If it start from digit it must be only core id. */
+ if (!isdigit(*next) || s_parsed || c_parsed || h_parsed)
+ return -EINVAL;
+
+ type = 'C';
+ }
+
+ for (num_len = 0; *next != '\0'; next++, num_len++) {
+ if (num_len == RTE_DIM(num))
+ return -EINVAL;
+
+ if (!isdigit(*next))
+ break;
+
+ num[num_len] = *next;
+ }
+
+ if (num_len == 0 && type != 'h' && type != 'H')
+ return -EINVAL;
+
+ if (num_len != 0 && (type == 'h' || type == 'H'))
+ return -EINVAL;
+
+ num[num_len] = '\0';
+ val = strtol(num, NULL, 10);
+
+ h = 0;
+ switch (type) {
+ case 's':
+ case 'S':
+ s = val;
+ break;
+ case 'c':
+ case 'C':
+ c = val;
+ break;
+ case 'h':
+ case 'H':
+ h = 1;
+ break;
+ }
+ }
+
+ *socket = s;
+ *core = c;
+ *ht = h;
+ return 0;
+}
+
+static uint32_t
+get_hex_val(char c)
+{
+ switch (c) {
+ case '0': case '1': case '2': case '3': case '4': case '5':
+ case '6': case '7': case '8': case '9':
+ return c - '0';
+ case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
+ return c - 'A' + 10;
+ case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
+ return c - 'a' + 10;
+ default:
+ return 0;
+ }
+}
+
+int
+parse_hex_string(char *src, uint8_t *dst, uint32_t *size)
+{
+ char *c;
+ uint32_t len, i;
+
+ /* Check input parameters */
+ if ((src == NULL) ||
+ (dst == NULL) ||
+ (size == NULL) ||
+ (*size == 0))
+ return -1;
+
+ len = strlen(src);
+ if (((len & 3) != 0) ||
+ (len > (*size) * 2))
+ return -1;
+ *size = len / 2;
+
+ for (c = src; *c != 0; c++) {
+ if ((((*c) >= '0') && ((*c) <= '9')) ||
+ (((*c) >= 'A') && ((*c) <= 'F')) ||
+ (((*c) >= 'a') && ((*c) <= 'f')))
+ continue;
+
+ return -1;
+ }
+
+ /* Convert chars to bytes */
+ for (i = 0; i < *size; i++)
+ dst[i] = get_hex_val(src[2 * i]) * 16 +
+ get_hex_val(src[2 * i + 1]);
+
+ return 0;
+}
+
+static size_t
+skip_digits(const char *src)
+{
+ size_t i;
+
+ for (i = 0; isdigit(src[i]); i++);
+
+ return i;
+}
+
+static int
+validate_name(const char *name, const char *prefix, int num)
+{
+ size_t i, j;
+
+ for (i = 0; (name[i] != '\0') && (prefix[i] != '\0'); i++) {
+ if (name[i] != prefix[i])
+ return -1;
+ }
+
+ if (prefix[i] != '\0')
+ return -1;
+
+ if (!num) {
+ if (name[i] != '\0')
+ return -1;
+ else
+ return 0;
+ }
+
+ if (num == 2) {
+ j = skip_digits(&name[i]);
+ i += j;
+ if ((j == 0) || (name[i] != '.'))
+ return -1;
+ i++;
+ }
+
+ if (num == 1) {
+ j = skip_digits(&name[i]);
+ i += j;
+ if ((j == 0) || (name[i] != '\0'))
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+parse_eal(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_eal_params *p = &app->eal_params;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *entry = &entries[i];
+
+ /* coremask */
+ if (strcmp(entry->name, "c") == 0) {
+ PARSE_WARNING_IGNORED(0, section_name, entry->name);
+ continue;
+ }
+
+ /* corelist */
+ if (strcmp(entry->name, "l") == 0) {
+ PARSE_WARNING_IGNORED(0, section_name, entry->name);
+ continue;
+ }
+
+ /* coremap */
+ if (strcmp(entry->name, "lcores") == 0) {
+ PARSE_ERROR_DUPLICATE((p->coremap == NULL),
+ section_name,
+ entry->name);
+ p->coremap = strdup(entry->value);
+ continue;
+ }
+
+ /* master_lcore */
+ if (strcmp(entry->name, "master_lcore") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((p->master_lcore_present == 0),
+ section_name,
+ entry->name);
+ p->master_lcore_present = 1;
+
+ status = parser_read_uint32(&p->master_lcore,
+ entry->value);
+ PARSE_ERROR((status == 0), section_name, entry->name);
+ continue;
+ }
+
+ /* channels */
+ if (strcmp(entry->name, "n") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((p->channels_present == 0),
+ section_name,
+ entry->name);
+ p->channels_present = 1;
+
+ status = parser_read_uint32(&p->channels, entry->value);
+ PARSE_ERROR((status == 0), section_name, entry->name);
+ continue;
+ }
+
+ /* memory */
+ if (strcmp(entry->name, "m") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((p->memory_present == 0),
+ section_name,
+ entry->name);
+ p->memory_present = 1;
+
+ status = parser_read_uint32(&p->memory, entry->value);
+ PARSE_ERROR((status == 0), section_name, entry->name);
+ continue;
+ }
+
+ /* ranks */
+ if (strcmp(entry->name, "r") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((p->ranks_present == 0),
+ section_name,
+ entry->name);
+ p->ranks_present = 1;
+
+ status = parser_read_uint32(&p->ranks, entry->value);
+ PARSE_ERROR((status == 0), section_name, entry->name);
+ continue;
+ }
+
+ /* pci_blacklist */
+ if ((strcmp(entry->name, "pci_blacklist") == 0) ||
+ (strcmp(entry->name, "b") == 0)) {
+ uint32_t i;
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_blacklist[i])
+ continue;
+
+ p->pci_blacklist[i] =
+ strdup(entry->value);
+ PARSE_ERROR_MALLOC(p->pci_blacklist[i]);
+
+ break;
+ }
+
+ PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS),
+ section_name, entry->name,
+ "too many elements");
+ continue;
+ }
+
+ /* pci_whitelist */
+ if ((strcmp(entry->name, "pci_whitelist") == 0) ||
+ (strcmp(entry->name, "w") == 0)) {
+ uint32_t i;
+
+ PARSE_ERROR_MESSAGE((app->port_mask != 0),
+ section_name, entry->name, "entry to be "
+ "generated by the application (port_mask "
+ "not provided)");
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_whitelist[i])
+ continue;
+
+ p->pci_whitelist[i] = strdup(entry->value);
+ PARSE_ERROR_MALLOC(p->pci_whitelist[i]);
+
+ break;
+ }
+
+ PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS),
+ section_name, entry->name,
+ "too many elements");
+ continue;
+ }
+
+ /* vdev */
+ if (strcmp(entry->name, "vdev") == 0) {
+ uint32_t i;
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->vdev[i])
+ continue;
+
+ p->vdev[i] = strdup(entry->value);
+ PARSE_ERROR_MALLOC(p->vdev[i]);
+
+ break;
+ }
+
+ PARSE_ERROR_MESSAGE((i < APP_MAX_LINKS),
+ section_name, entry->name,
+ "too many elements");
+ continue;
+ }
+
+ /* vmware_tsc_map */
+ if (strcmp(entry->name, "vmware_tsc_map") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->vmware_tsc_map_present == 0),
+ section_name,
+ entry->name);
+ p->vmware_tsc_map_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->vmware_tsc_map = val;
+ continue;
+ }
+
+ /* proc_type */
+ if (strcmp(entry->name, "proc_type") == 0) {
+ PARSE_ERROR_DUPLICATE((p->proc_type == NULL),
+ section_name,
+ entry->name);
+ p->proc_type = strdup(entry->value);
+ continue;
+ }
+
+ /* syslog */
+ if (strcmp(entry->name, "syslog") == 0) {
+ PARSE_ERROR_DUPLICATE((p->syslog == NULL),
+ section_name,
+ entry->name);
+ p->syslog = strdup(entry->value);
+ continue;
+ }
+
+ /* log_level */
+ if (strcmp(entry->name, "log_level") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((p->log_level_present == 0),
+ section_name,
+ entry->name);
+ p->log_level_present = 1;
+
+ status = parser_read_uint32(&p->log_level,
+ entry->value);
+ PARSE_ERROR((status == 0), section_name, entry->name);
+ continue;
+ }
+
+ /* version */
+ if (strcmp(entry->name, "v") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->version_present == 0),
+ section_name,
+ entry->name);
+ p->version_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->version = val;
+ continue;
+ }
+
+ /* help */
+ if ((strcmp(entry->name, "help") == 0) ||
+ (strcmp(entry->name, "h") == 0)) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->help_present == 0),
+ section_name,
+ entry->name);
+ p->help_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->help = val;
+ continue;
+ }
+
+ /* no_huge */
+ if (strcmp(entry->name, "no_huge") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->no_huge_present == 0),
+ section_name,
+ entry->name);
+ p->no_huge_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->no_huge = val;
+ continue;
+ }
+
+ /* no_pci */
+ if (strcmp(entry->name, "no_pci") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->no_pci_present == 0),
+ section_name,
+ entry->name);
+ p->no_pci_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->no_pci = val;
+ continue;
+ }
+
+ /* no_hpet */
+ if (strcmp(entry->name, "no_hpet") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->no_hpet_present == 0),
+ section_name,
+ entry->name);
+ p->no_hpet_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->no_hpet = val;
+ continue;
+ }
+
+ /* no_shconf */
+ if (strcmp(entry->name, "no_shconf") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->no_shconf_present == 0),
+ section_name,
+ entry->name);
+ p->no_shconf_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->no_shconf = val;
+ continue;
+ }
+
+ /* add_driver */
+ if (strcmp(entry->name, "d") == 0) {
+ PARSE_ERROR_DUPLICATE((p->add_driver == NULL),
+ section_name,
+ entry->name);
+ p->add_driver = strdup(entry->value);
+ continue;
+ }
+
+ /* socket_mem */
+ if (strcmp(entry->name, "socket_mem") == 0) {
+ PARSE_ERROR_DUPLICATE((p->socket_mem == NULL),
+ section_name,
+ entry->name);
+ p->socket_mem = strdup(entry->value);
+ continue;
+ }
+
+ /* huge_dir */
+ if (strcmp(entry->name, "huge_dir") == 0) {
+ PARSE_ERROR_DUPLICATE((p->huge_dir == NULL),
+ section_name,
+ entry->name);
+ p->huge_dir = strdup(entry->value);
+ continue;
+ }
+
+ /* file_prefix */
+ if (strcmp(entry->name, "file_prefix") == 0) {
+ PARSE_ERROR_DUPLICATE((p->file_prefix == NULL),
+ section_name,
+ entry->name);
+ p->file_prefix = strdup(entry->value);
+ continue;
+ }
+
+ /* base_virtaddr */
+ if (strcmp(entry->name, "base_virtaddr") == 0) {
+ PARSE_ERROR_DUPLICATE((p->base_virtaddr == NULL),
+ section_name,
+ entry->name);
+ p->base_virtaddr = strdup(entry->value);
+ continue;
+ }
+
+ /* create_uio_dev */
+ if (strcmp(entry->name, "create_uio_dev") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->create_uio_dev_present == 0),
+ section_name,
+ entry->name);
+ p->create_uio_dev_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->create_uio_dev = val;
+ continue;
+ }
+
+ /* vfio_intr */
+ if (strcmp(entry->name, "vfio_intr") == 0) {
+ PARSE_ERROR_DUPLICATE((p->vfio_intr == NULL),
+ section_name,
+ entry->name);
+ p->vfio_intr = strdup(entry->value);
+ continue;
+ }
+
+ /* xen_dom0 */
+ if (strcmp(entry->name, "xen_dom0") == 0) {
+ int val;
+
+ PARSE_ERROR_DUPLICATE((p->xen_dom0_present == 0),
+ section_name,
+ entry->name);
+ p->xen_dom0_present = 1;
+
+ val = parser_read_arg_bool(entry->value);
+ PARSE_ERROR((val >= 0), section_name, entry->name);
+ p->xen_dom0 = val;
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, entry->name);
+ }
+
+ free(entries);
+}
+
+static int
+parse_pipeline_pcap_source(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *file_name, const char *cp_size)
+{
+ const char *next = NULL;
+ char *end;
+ uint32_t i;
+ int parse_file = 0;
+
+ if (file_name && !cp_size) {
+ next = file_name;
+ parse_file = 1; /* parse file path */
+ } else if (cp_size && !file_name) {
+ next = cp_size;
+ parse_file = 0; /* parse copy size */
+ } else
+ return -EINVAL;
+
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+
+ if (p->n_pktq_in == 0)
+ return -EINVAL;
+
+ i = 0;
+ while (*next != '\0') {
+ uint32_t id;
+
+ if (i >= p->n_pktq_in)
+ return -EINVAL;
+
+ id = p->pktq_in[i].id;
+
+ end = strchr(next, ' ');
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+
+ if (parse_file) {
+ app->source_params[id].file_name = strdup(name);
+ if (app->source_params[id].file_name == NULL)
+ return -ENOMEM;
+ } else {
+ if (parser_read_uint32(
+ &app->source_params[id].n_bytes_per_pkt,
+ name) != 0) {
+ if (app->source_params[id].
+ file_name != NULL)
+ free(app->source_params[id].
+ file_name);
+ return -EINVAL;
+ }
+ }
+
+ i++;
+
+ if (i == p->n_pktq_in)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int
+parse_pipeline_pcap_sink(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *file_name, const char *n_pkts_to_dump)
+{
+ const char *next = NULL;
+ char *end;
+ uint32_t i;
+ int parse_file = 0;
+
+ if (file_name && !n_pkts_to_dump) {
+ next = file_name;
+ parse_file = 1; /* parse file path */
+ } else if (n_pkts_to_dump && !file_name) {
+ next = n_pkts_to_dump;
+ parse_file = 0; /* parse copy size */
+ } else
+ return -EINVAL;
+
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+
+ if (p->n_pktq_out == 0)
+ return -EINVAL;
+
+ i = 0;
+ while (*next != '\0') {
+ uint32_t id;
+
+ if (i >= p->n_pktq_out)
+ return -EINVAL;
+
+ id = p->pktq_out[i].id;
+
+ end = strchr(next, ' ');
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+
+ if (parse_file) {
+ app->sink_params[id].file_name = strdup(name);
+ if (app->sink_params[id].file_name == NULL)
+ return -ENOMEM;
+ } else {
+ if (parser_read_uint32(
+ &app->sink_params[id].n_pkts_to_dump,
+ name) != 0) {
+ if (app->sink_params[id].file_name !=
+ NULL)
+ free(app->sink_params[id].
+ file_name);
+ return -EINVAL;
+ }
+ }
+
+ i++;
+
+ if (i == p->n_pktq_out)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+static int
+parse_pipeline_pktq_in(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *value)
+{
+ const char *next = value;
+ char *end;
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+
+ while (*next != '\0') {
+ enum app_pktq_in_type type;
+ int id;
+ char *end_space;
+ char *end_tab;
+
+ next = skip_white_spaces(next);
+ if (!next)
+ break;
+
+ end_space = strchr(next, ' ');
+ end_tab = strchr(next, ' ');
+
+ if (end_space && (!end_tab))
+ end = end_space;
+ else if ((!end_space) && end_tab)
+ end = end_tab;
+ else if (end_space && end_tab)
+ end = RTE_MIN(end_space, end_tab);
+ else
+ end = NULL;
+
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+
+ if (validate_name(name, "RXQ", 2) == 0) {
+ type = APP_PKTQ_IN_HWQ;
+ id = APP_PARAM_ADD(app->hwq_in_params, name);
+ } else if (validate_name(name, "SWQ", 1) == 0) {
+ type = APP_PKTQ_IN_SWQ;
+ id = APP_PARAM_ADD(app->swq_params, name);
+ } else if (validate_name(name, "TM", 1) == 0) {
+ type = APP_PKTQ_IN_TM;
+ id = APP_PARAM_ADD(app->tm_params, name);
+ } else if (validate_name(name, "SOURCE", 1) == 0) {
+ type = APP_PKTQ_IN_SOURCE;
+ id = APP_PARAM_ADD(app->source_params, name);
+ } else
+ return -EINVAL;
+
+ if (id < 0)
+ return id;
+
+ p->pktq_in[p->n_pktq_in].type = type;
+ p->pktq_in[p->n_pktq_in].id = (uint32_t) id;
+ p->n_pktq_in++;
+ }
+
+ return 0;
+}
+
+static int
+parse_pipeline_pktq_out(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *value)
+{
+ const char *next = value;
+ char *end;
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+
+ while (*next != '\0') {
+ enum app_pktq_out_type type;
+ int id;
+ char *end_space;
+ char *end_tab;
+
+ next = skip_white_spaces(next);
+ if (!next)
+ break;
+
+ end_space = strchr(next, ' ');
+ end_tab = strchr(next, ' ');
+
+ if (end_space && (!end_tab))
+ end = end_space;
+ else if ((!end_space) && end_tab)
+ end = end_tab;
+ else if (end_space && end_tab)
+ end = RTE_MIN(end_space, end_tab);
+ else
+ end = NULL;
+
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+ if (validate_name(name, "TXQ", 2) == 0) {
+ type = APP_PKTQ_OUT_HWQ;
+ id = APP_PARAM_ADD(app->hwq_out_params, name);
+ } else if (validate_name(name, "SWQ", 1) == 0) {
+ type = APP_PKTQ_OUT_SWQ;
+ id = APP_PARAM_ADD(app->swq_params, name);
+ } else if (validate_name(name, "TM", 1) == 0) {
+ type = APP_PKTQ_OUT_TM;
+ id = APP_PARAM_ADD(app->tm_params, name);
+ } else if (validate_name(name, "SINK", 1) == 0) {
+ type = APP_PKTQ_OUT_SINK;
+ id = APP_PARAM_ADD(app->sink_params, name);
+ } else
+ return -EINVAL;
+
+ if (id < 0)
+ return id;
+
+ p->pktq_out[p->n_pktq_out].type = type;
+ p->pktq_out[p->n_pktq_out].id = id;
+ p->n_pktq_out++;
+ }
+
+ return 0;
+}
+
+static int
+parse_pipeline_msgq_in(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *value)
+{
+ const char *next = value;
+ char *end;
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+ ssize_t idx;
+
+ while (*next != '\0') {
+ char *end_space;
+ char *end_tab;
+
+ next = skip_white_spaces(next);
+ if (!next)
+ break;
+
+ end_space = strchr(next, ' ');
+ end_tab = strchr(next, ' ');
+
+ if (end_space && (!end_tab))
+ end = end_space;
+ else if ((!end_space) && end_tab)
+ end = end_tab;
+ else if (end_space && end_tab)
+ end = RTE_MIN(end_space, end_tab);
+ else
+ end = NULL;
+
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+
+ if (validate_name(name, "MSGQ", 1) != 0)
+ return -EINVAL;
+
+ idx = APP_PARAM_ADD(app->msgq_params, name);
+ if (idx < 0)
+ return idx;
+
+ p->msgq_in[p->n_msgq_in] = idx;
+ p->n_msgq_in++;
+ }
+
+ return 0;
+}
+
+static int
+parse_pipeline_msgq_out(struct app_params *app,
+ struct app_pipeline_params *p,
+ const char *value)
+{
+ const char *next = value;
+ char *end;
+ char name[APP_PARAM_NAME_SIZE];
+ size_t name_len;
+ ssize_t idx;
+
+ while (*next != '\0') {
+ char *end_space;
+ char *end_tab;
+
+ next = skip_white_spaces(next);
+ if (!next)
+ break;
+
+ end_space = strchr(next, ' ');
+ end_tab = strchr(next, ' ');
+
+ if (end_space && (!end_tab))
+ end = end_space;
+ else if ((!end_space) && end_tab)
+ end = end_tab;
+ else if (end_space && end_tab)
+ end = RTE_MIN(end_space, end_tab);
+ else
+ end = NULL;
+
+ if (!end)
+ name_len = strlen(next);
+ else
+ name_len = end - next;
+
+ if (name_len == 0 || name_len == sizeof(name))
+ return -EINVAL;
+
+ strncpy(name, next, name_len);
+ name[name_len] = '\0';
+ next += name_len;
+ if (*next != '\0')
+ next++;
+
+ if (validate_name(name, "MSGQ", 1) != 0)
+ return -EINVAL;
+
+ idx = APP_PARAM_ADD(app->msgq_params, name);
+ if (idx < 0)
+ return idx;
+
+ p->msgq_out[p->n_msgq_out] = idx;
+ p->n_msgq_out++;
+ }
+
+ return 0;
+}
+
+static void
+parse_pipeline(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ char name[CFG_NAME_LEN];
+ struct app_pipeline_params *param;
+ struct rte_cfgfile_entry *entries;
+ ssize_t param_idx;
+ int n_entries, i;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->pipeline_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->pipeline_params, section_name);
+
+ param = &app->pipeline_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "type") == 0) {
+ int w_size = snprintf(param->type, RTE_DIM(param->type),
+ "%s", ent->value);
+
+ PARSE_ERROR(((w_size > 0) &&
+ (w_size < (int)RTE_DIM(param->type))),
+ section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "core") == 0) {
+ int status = parse_pipeline_core(
+ &param->socket_id, &param->core_id,
+ &param->hyper_th_id, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pktq_in") == 0) {
+ int status = parse_pipeline_pktq_in(app, param,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pktq_out") == 0) {
+ int status = parse_pipeline_pktq_out(app, param,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "msgq_in") == 0) {
+ int status = parse_pipeline_msgq_in(app, param,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "msgq_out") == 0) {
+ int status = parse_pipeline_msgq_out(app, param,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "timer_period") == 0) {
+ int status = parser_read_uint32(
+ &param->timer_period,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_file_rd") == 0) {
+ int status;
+
+#ifndef RTE_PORT_PCAP
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+#endif
+
+ status = parse_pipeline_pcap_source(app,
+ param, ent->value, NULL);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_bytes_rd_per_pkt") == 0) {
+ int status;
+
+#ifndef RTE_PORT_PCAP
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+#endif
+
+ status = parse_pipeline_pcap_source(app,
+ param, NULL, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_file_wr") == 0) {
+ int status;
+
+#ifndef RTE_PORT_PCAP
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+#endif
+
+ status = parse_pipeline_pcap_sink(app, param,
+ ent->value, NULL);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_n_pkt_wr") == 0) {
+ int status;
+
+#ifndef RTE_PORT_PCAP
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+#endif
+
+ status = parse_pipeline_pcap_sink(app, param,
+ NULL, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* pipeline type specific items */
+ APP_CHECK((param->n_args < APP_MAX_PIPELINE_ARGS),
+ "Parse error in section \"%s\": too many "
+ "pipeline specified parameters", section_name);
+
+ param->args_name[param->n_args] = strdup(ent->name);
+ param->args_value[param->n_args] = strdup(ent->value);
+
+ APP_CHECK((param->args_name[param->n_args] != NULL) &&
+ (param->args_value[param->n_args] != NULL),
+ "Parse error: no free memory");
+
+ param->n_args++;
+ }
+
+ param->parsed = 1;
+
+ snprintf(name, sizeof(name), "MSGQ-REQ-%s", section_name);
+ param_idx = APP_PARAM_ADD(app->msgq_params, name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name);
+ app->msgq_params[param_idx].cpu_socket_id = param->socket_id;
+ param->msgq_in[param->n_msgq_in++] = param_idx;
+
+ snprintf(name, sizeof(name), "MSGQ-RSP-%s", section_name);
+ param_idx = APP_PARAM_ADD(app->msgq_params, name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name);
+ app->msgq_params[param_idx].cpu_socket_id = param->socket_id;
+ param->msgq_out[param->n_msgq_out++] = param_idx;
+
+ snprintf(name, sizeof(name), "MSGQ-REQ-CORE-s%" PRIu32 "c%" PRIu32 "%s",
+ param->socket_id,
+ param->core_id,
+ (param->hyper_th_id) ? "h" : "");
+ param_idx = APP_PARAM_ADD(app->msgq_params, name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name);
+ app->msgq_params[param_idx].cpu_socket_id = param->socket_id;
+
+ snprintf(name, sizeof(name), "MSGQ-RSP-CORE-s%" PRIu32 "c%" PRIu32 "%s",
+ param->socket_id,
+ param->core_id,
+ (param->hyper_th_id) ? "h" : "");
+ param_idx = APP_PARAM_ADD(app->msgq_params, name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, name);
+ app->msgq_params[param_idx].cpu_socket_id = param->socket_id;
+
+ free(entries);
+}
+
+static void
+parse_mempool(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_mempool_params *param;
+ struct rte_cfgfile_entry *entries;
+ ssize_t param_idx;
+ int n_entries, i;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->mempool_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->mempool_params, section_name);
+
+ param = &app->mempool_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "buffer_size") == 0) {
+ int status = parser_read_uint32(
+ &param->buffer_size, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pool_size") == 0) {
+ int status = parser_read_uint32(
+ &param->pool_size, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "cache_size") == 0) {
+ int status = parser_read_uint32(
+ &param->cache_size, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "cpu") == 0) {
+ int status = parser_read_uint32(
+ &param->cpu_socket_id, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_link(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_link_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ int pci_bdf_present = 0;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->link_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->link_params, section_name);
+
+ param = &app->link_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "promisc") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->promisc = status;
+ continue;
+ }
+
+ if (strcmp(ent->name, "arp_q") == 0) {
+ int status = parser_read_uint32(&param->arp_q,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "tcp_syn_q") == 0) {
+ int status = parser_read_uint32(
+ &param->tcp_syn_q, ent->value);
+
+ PARSE_ERROR((status == 0), section_name, ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "ip_local_q") == 0) {
+ int status = parser_read_uint32(
+ &param->ip_local_q, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+
+ if (strcmp(ent->name, "tcp_local_q") == 0) {
+ int status = parser_read_uint32(
+ &param->tcp_local_q, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "udp_local_q") == 0) {
+ int status = parser_read_uint32(
+ &param->udp_local_q, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "sctp_local_q") == 0) {
+ int status = parser_read_uint32(
+ &param->sctp_local_q, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pci_bdf") == 0) {
+ PARSE_ERROR_DUPLICATE((pci_bdf_present == 0),
+ section_name, ent->name);
+
+ snprintf(param->pci_bdf, APP_LINK_PCI_BDF_SIZE,
+ "%s", ent->value);
+ pci_bdf_present = 1;
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ /* Check for mandatory fields */
+ if (app->port_mask)
+ PARSE_ERROR_MESSAGE((pci_bdf_present == 0),
+ section_name, "pci_bdf",
+ "entry not allowed (port_mask is provided)");
+ else
+ PARSE_ERROR_MESSAGE((pci_bdf_present),
+ section_name, "pci_bdf",
+ "this entry is mandatory (port_mask is not "
+ "provided)");
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_rxq(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_hwq_in_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->hwq_in_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->hwq_in_params, section_name);
+
+ param = &app->hwq_in_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "mempool") == 0) {
+ int status = validate_name(ent->value,
+ "MEMPOOL", 1);
+ ssize_t idx;
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ idx = APP_PARAM_ADD(app->mempool_params,
+ ent->value);
+ PARSER_PARAM_ADD_CHECK(idx, app->mempool_params,
+ section_name);
+ param->mempool_id = idx;
+ continue;
+ }
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst") == 0) {
+ int status = parser_read_uint32(&param->burst,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_txq(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_hwq_out_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->hwq_out_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->hwq_out_params, section_name);
+
+ param = &app->hwq_out_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst") == 0) {
+ int status = parser_read_uint32(&param->burst,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "dropless") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->dropless = status;
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_swq(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_swq_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ uint32_t mtu_present = 0;
+ uint32_t metadata_size_present = 0;
+ uint32_t mempool_direct_present = 0;
+ uint32_t mempool_indirect_present = 0;
+
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->swq_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->swq_params, section_name);
+
+ param = &app->swq_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst_read") == 0) {
+ int status = parser_read_uint32(&
+ param->burst_read, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst_write") == 0) {
+ int status = parser_read_uint32(
+ &param->burst_write, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "dropless") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->dropless = status;
+ continue;
+ }
+
+ if (strcmp(ent->name, "n_retries") == 0) {
+ int status = parser_read_uint64(&param->n_retries,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "cpu") == 0) {
+ int status = parser_read_uint32(
+ &param->cpu_socket_id, ent->value);
+
+ PARSE_ERROR((status == 0), section_name, ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "ipv4_frag") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+
+ param->ipv4_frag = status;
+ if (param->mtu == 0)
+ param->mtu = 1500;
+
+ continue;
+ }
+
+ if (strcmp(ent->name, "ipv6_frag") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->ipv6_frag = status;
+ if (param->mtu == 0)
+ param->mtu = 1320;
+ continue;
+ }
+
+ if (strcmp(ent->name, "ipv4_ras") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->ipv4_ras = status;
+ continue;
+ }
+
+ if (strcmp(ent->name, "ipv6_ras") == 0) {
+ int status = parser_read_arg_bool(ent->value);
+
+ PARSE_ERROR((status != -EINVAL), section_name,
+ ent->name);
+ param->ipv6_ras = status;
+ continue;
+ }
+
+ if (strcmp(ent->name, "mtu") == 0) {
+ int status = parser_read_uint32(&param->mtu,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ mtu_present = 1;
+ continue;
+ }
+
+ if (strcmp(ent->name, "metadata_size") == 0) {
+ int status = parser_read_uint32(
+ &param->metadata_size, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ metadata_size_present = 1;
+ continue;
+ }
+
+ if (strcmp(ent->name, "mempool_direct") == 0) {
+ int status = validate_name(ent->value,
+ "MEMPOOL", 1);
+ ssize_t idx;
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+
+ idx = APP_PARAM_ADD(app->mempool_params,
+ ent->value);
+ PARSER_PARAM_ADD_CHECK(idx, app->mempool_params,
+ section_name);
+ param->mempool_direct_id = idx;
+ mempool_direct_present = 1;
+ continue;
+ }
+
+ if (strcmp(ent->name, "mempool_indirect") == 0) {
+ int status = validate_name(ent->value,
+ "MEMPOOL", 1);
+ ssize_t idx;
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ idx = APP_PARAM_ADD(app->mempool_params,
+ ent->value);
+ PARSER_PARAM_ADD_CHECK(idx, app->mempool_params,
+ section_name);
+ param->mempool_indirect_id = idx;
+ mempool_indirect_present = 1;
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ APP_CHECK(((mtu_present) &&
+ ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))),
+ "Parse error in section \"%s\": IPv4/IPv6 fragmentation "
+ "is off, therefore entry \"mtu\" is not allowed",
+ section_name);
+
+ APP_CHECK(((metadata_size_present) &&
+ ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))),
+ "Parse error in section \"%s\": IPv4/IPv6 fragmentation "
+ "is off, therefore entry \"metadata_size\" is "
+ "not allowed", section_name);
+
+ APP_CHECK(((mempool_direct_present) &&
+ ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))),
+ "Parse error in section \"%s\": IPv4/IPv6 fragmentation "
+ "is off, therefore entry \"mempool_direct\" is "
+ "not allowed", section_name);
+
+ APP_CHECK(((mempool_indirect_present) &&
+ ((param->ipv4_frag == 1) || (param->ipv6_frag == 1))),
+ "Parse error in section \"%s\": IPv4/IPv6 fragmentation "
+ "is off, therefore entry \"mempool_indirect\" is "
+ "not allowed", section_name);
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_tm(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_tm_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->tm_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->tm_params, section_name);
+
+ param = &app->tm_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "cfg") == 0) {
+ param->file_name = strdup(ent->value);
+ PARSE_ERROR_MALLOC(param->file_name != NULL);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst_read") == 0) {
+ int status = parser_read_uint32(
+ &param->burst_read, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst_write") == 0) {
+ int status = parser_read_uint32(
+ &param->burst_write, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_source(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_source_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+ uint32_t pcap_file_present = 0;
+ uint32_t pcap_size_present = 0;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->source_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->source_params, section_name);
+
+ param = &app->source_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "mempool") == 0) {
+ int status = validate_name(ent->value,
+ "MEMPOOL", 1);
+ ssize_t idx;
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ idx = APP_PARAM_ADD(app->mempool_params,
+ ent->value);
+ PARSER_PARAM_ADD_CHECK(idx, app->mempool_params,
+ section_name);
+ param->mempool_id = idx;
+ continue;
+ }
+
+ if (strcmp(ent->name, "burst") == 0) {
+ int status = parser_read_uint32(&param->burst,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_file_rd")) {
+ PARSE_ERROR_DUPLICATE((pcap_file_present == 0),
+ section_name, ent->name);
+
+ param->file_name = strdup(ent->value);
+
+ PARSE_ERROR_MALLOC(param->file_name != NULL);
+ pcap_file_present = 1;
+
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_bytes_rd_per_pkt") == 0) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((pcap_size_present == 0),
+ section_name, ent->name);
+
+ status = parser_read_uint32(
+ &param->n_bytes_per_pkt, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ pcap_size_present = 1;
+
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_sink(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_pktq_sink_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+ uint32_t pcap_file_present = 0;
+ uint32_t pcap_n_pkt_present = 0;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->sink_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->sink_params, section_name);
+
+ param = &app->sink_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "pcap_file_wr")) {
+ PARSE_ERROR_DUPLICATE((pcap_file_present == 0),
+ section_name, ent->name);
+
+ param->file_name = strdup(ent->value);
+
+ PARSE_ERROR_MALLOC((param->file_name != NULL));
+
+ continue;
+ }
+
+ if (strcmp(ent->name, "pcap_n_pkt_wr")) {
+ int status;
+
+ PARSE_ERROR_DUPLICATE((pcap_n_pkt_present == 0),
+ section_name, ent->name);
+
+ status = parser_read_uint32(
+ &param->n_pkts_to_dump, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_msgq_req_pipeline(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_msgq_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->msgq_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name);
+
+ param = &app->msgq_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+ free(entries);
+}
+
+static void
+parse_msgq_rsp_pipeline(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_msgq_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->msgq_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name);
+
+ param = &app->msgq_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+static void
+parse_msgq(struct app_params *app,
+ const char *section_name,
+ struct rte_cfgfile *cfg)
+{
+ struct app_msgq_params *param;
+ struct rte_cfgfile_entry *entries;
+ int n_entries, i;
+ ssize_t param_idx;
+
+ n_entries = rte_cfgfile_section_num_entries(cfg, section_name);
+ PARSE_ERROR_SECTION_NO_ENTRIES((n_entries > 0), section_name);
+
+ entries = malloc(n_entries * sizeof(struct rte_cfgfile_entry));
+ PARSE_ERROR_MALLOC(entries != NULL);
+
+ rte_cfgfile_section_entries(cfg, section_name, entries, n_entries);
+
+ param_idx = APP_PARAM_ADD(app->msgq_params, section_name);
+ PARSER_PARAM_ADD_CHECK(param_idx, app->msgq_params, section_name);
+
+ param = &app->msgq_params[param_idx];
+
+ for (i = 0; i < n_entries; i++) {
+ struct rte_cfgfile_entry *ent = &entries[i];
+
+ if (strcmp(ent->name, "size") == 0) {
+ int status = parser_read_uint32(&param->size,
+ ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ if (strcmp(ent->name, "cpu") == 0) {
+ int status = parser_read_uint32(
+ &param->cpu_socket_id, ent->value);
+
+ PARSE_ERROR((status == 0), section_name,
+ ent->name);
+ continue;
+ }
+
+ /* unrecognized */
+ PARSE_ERROR_INVALID(0, section_name, ent->name);
+ }
+
+ param->parsed = 1;
+
+ free(entries);
+}
+
+typedef void (*config_section_load)(struct app_params *p,
+ const char *section_name,
+ struct rte_cfgfile *cfg);
+
+struct config_section {
+ const char prefix[CFG_NAME_LEN];
+ int numbers;
+ config_section_load load;
+};
+
+static const struct config_section cfg_file_scheme[] = {
+ {"EAL", 0, parse_eal},
+ {"PIPELINE", 1, parse_pipeline},
+ {"MEMPOOL", 1, parse_mempool},
+ {"LINK", 1, parse_link},
+ {"RXQ", 2, parse_rxq},
+ {"TXQ", 2, parse_txq},
+ {"SWQ", 1, parse_swq},
+ {"TM", 1, parse_tm},
+ {"SOURCE", 1, parse_source},
+ {"SINK", 1, parse_sink},
+ {"MSGQ-REQ-PIPELINE", 1, parse_msgq_req_pipeline},
+ {"MSGQ-RSP-PIPELINE", 1, parse_msgq_rsp_pipeline},
+ {"MSGQ", 1, parse_msgq},
+};
+
+static void
+create_implicit_mempools(struct app_params *app)
+{
+ ssize_t idx;
+
+ idx = APP_PARAM_ADD(app->mempool_params, "MEMPOOL0");
+ PARSER_PARAM_ADD_CHECK(idx, app->mempool_params, "start-up");
+}
+
+static void
+create_implicit_links_from_port_mask(struct app_params *app,
+ uint64_t port_mask)
+{
+ uint32_t pmd_id, link_id;
+
+ link_id = 0;
+ for (pmd_id = 0; pmd_id < RTE_MAX_ETHPORTS; pmd_id++) {
+ char name[APP_PARAM_NAME_SIZE];
+ ssize_t idx;
+
+ if ((port_mask & (1LLU << pmd_id)) == 0)
+ continue;
+
+ snprintf(name, sizeof(name), "LINK%" PRIu32, link_id);
+ idx = APP_PARAM_ADD(app->link_params, name);
+ PARSER_PARAM_ADD_CHECK(idx, app->link_params, name);
+
+ app->link_params[idx].pmd_id = pmd_id;
+ link_id++;
+ }
+}
+
+static void
+assign_link_pmd_id_from_pci_bdf(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *link = &app->link_params[i];
+
+ link->pmd_id = i;
+ }
+}
+
+int
+app_config_parse(struct app_params *app, const char *file_name)
+{
+ struct rte_cfgfile *cfg;
+ char **section_names;
+ int i, j, sect_count;
+
+ /* Implicit mempools */
+ create_implicit_mempools(app);
+
+ /* Port mask */
+ if (app->port_mask)
+ create_implicit_links_from_port_mask(app, app->port_mask);
+
+ /* Load application configuration file */
+ cfg = rte_cfgfile_load(file_name, 0);
+ APP_CHECK((cfg != NULL), "Parse error: Unable to load config "
+ "file %s", file_name);
+
+ sect_count = rte_cfgfile_num_sections(cfg, NULL, 0);
+ APP_CHECK((sect_count > 0), "Parse error: number of sections "
+ "in file \"%s\" return %d", file_name,
+ sect_count);
+
+ section_names = malloc(sect_count * sizeof(char *));
+ PARSE_ERROR_MALLOC(section_names != NULL);
+
+ for (i = 0; i < sect_count; i++)
+ section_names[i] = malloc(CFG_NAME_LEN);
+
+ rte_cfgfile_sections(cfg, section_names, sect_count);
+
+ for (i = 0; i < sect_count; i++) {
+ const struct config_section *sch_s;
+ int len, cfg_name_len;
+
+ cfg_name_len = strlen(section_names[i]);
+
+ /* Find section type */
+ for (j = 0; j < (int)RTE_DIM(cfg_file_scheme); j++) {
+ sch_s = &cfg_file_scheme[j];
+ len = strlen(sch_s->prefix);
+
+ if (cfg_name_len < len)
+ continue;
+
+ /* After section name we expect only '\0' or digit or
+ * digit dot digit, so protect against false matching,
+ * for example: "ABC" should match section name
+ * "ABC0.0", but it should not match section_name
+ * "ABCDEF".
+ */
+ if ((section_names[i][len] != '\0') &&
+ !isdigit(section_names[i][len]))
+ continue;
+
+ if (strncmp(sch_s->prefix, section_names[i], len) == 0)
+ break;
+ }
+
+ APP_CHECK(j < (int)RTE_DIM(cfg_file_scheme),
+ "Parse error: unknown section %s",
+ section_names[i]);
+
+ APP_CHECK(validate_name(section_names[i],
+ sch_s->prefix,
+ sch_s->numbers) == 0,
+ "Parse error: invalid section name \"%s\"",
+ section_names[i]);
+
+ sch_s->load(app, section_names[i], cfg);
+ }
+
+ for (i = 0; i < sect_count; i++)
+ free(section_names[i]);
+
+ free(section_names);
+
+ rte_cfgfile_close(cfg);
+
+ APP_PARAM_COUNT(app->mempool_params, app->n_mempools);
+ APP_PARAM_COUNT(app->link_params, app->n_links);
+ APP_PARAM_COUNT(app->hwq_in_params, app->n_pktq_hwq_in);
+ APP_PARAM_COUNT(app->hwq_out_params, app->n_pktq_hwq_out);
+ APP_PARAM_COUNT(app->swq_params, app->n_pktq_swq);
+ APP_PARAM_COUNT(app->tm_params, app->n_pktq_tm);
+ APP_PARAM_COUNT(app->source_params, app->n_pktq_source);
+ APP_PARAM_COUNT(app->sink_params, app->n_pktq_sink);
+ APP_PARAM_COUNT(app->msgq_params, app->n_msgq);
+ APP_PARAM_COUNT(app->pipeline_params, app->n_pipelines);
+
+#ifdef RTE_PORT_PCAP
+ for (i = 0; i < (int)app->n_pktq_source; i++) {
+ struct app_pktq_source_params *p = &app->source_params[i];
+
+ APP_CHECK((p->file_name), "Parse error: missing "
+ "mandatory field \"pcap_file_rd\" for \"%s\"",
+ p->name);
+ }
+#else
+ for (i = 0; i < (int)app->n_pktq_source; i++) {
+ struct app_pktq_source_params *p = &app->source_params[i];
+
+ APP_CHECK((!p->file_name), "Parse error: invalid field "
+ "\"pcap_file_rd\" for \"%s\"", p->name);
+ }
+#endif
+
+ if (app->port_mask == 0)
+ assign_link_pmd_id_from_pci_bdf(app);
+
+ /* Save configuration to output file */
+ app_config_save(app, app->output_file);
+
+ /* Load TM configuration files */
+ app_config_parse_tm(app);
+
+ return 0;
+}
+
+static void
+save_eal_params(struct app_params *app, FILE *f)
+{
+ struct app_eal_params *p = &app->eal_params;
+ uint32_t i;
+
+ fprintf(f, "[EAL]\n");
+
+ if (p->coremap)
+ fprintf(f, "%s = %s\n", "lcores", p->coremap);
+
+ if (p->master_lcore_present)
+ fprintf(f, "%s = %" PRIu32 "\n",
+ "master_lcore", p->master_lcore);
+
+ fprintf(f, "%s = %" PRIu32 "\n", "n", p->channels);
+
+ if (p->memory_present)
+ fprintf(f, "%s = %" PRIu32 "\n", "m", p->memory);
+
+ if (p->ranks_present)
+ fprintf(f, "%s = %" PRIu32 "\n", "r", p->ranks);
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_blacklist[i] == NULL)
+ break;
+
+ fprintf(f, "%s = %s\n", "pci_blacklist",
+ p->pci_blacklist[i]);
+ }
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_whitelist[i] == NULL)
+ break;
+
+ fprintf(f, "%s = %s\n", "pci_whitelist",
+ p->pci_whitelist[i]);
+ }
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->vdev[i] == NULL)
+ break;
+
+ fprintf(f, "%s = %s\n", "vdev",
+ p->vdev[i]);
+ }
+
+ if (p->vmware_tsc_map_present)
+ fprintf(f, "%s = %s\n", "vmware_tsc_map",
+ (p->vmware_tsc_map) ? "yes" : "no");
+
+ if (p->proc_type)
+ fprintf(f, "%s = %s\n", "proc_type", p->proc_type);
+
+ if (p->syslog)
+ fprintf(f, "%s = %s\n", "syslog", p->syslog);
+
+ if (p->log_level_present)
+ fprintf(f, "%s = %" PRIu32 "\n", "log_level", p->log_level);
+
+ if (p->version_present)
+ fprintf(f, "%s = %s\n", "v", (p->version) ? "yes" : "no");
+
+ if (p->help_present)
+ fprintf(f, "%s = %s\n", "help", (p->help) ? "yes" : "no");
+
+ if (p->no_huge_present)
+ fprintf(f, "%s = %s\n", "no_huge", (p->no_huge) ? "yes" : "no");
+
+ if (p->no_pci_present)
+ fprintf(f, "%s = %s\n", "no_pci", (p->no_pci) ? "yes" : "no");
+
+ if (p->no_hpet_present)
+ fprintf(f, "%s = %s\n", "no_hpet", (p->no_hpet) ? "yes" : "no");
+
+ if (p->no_shconf_present)
+ fprintf(f, "%s = %s\n", "no_shconf",
+ (p->no_shconf) ? "yes" : "no");
+
+ if (p->add_driver)
+ fprintf(f, "%s = %s\n", "d", p->add_driver);
+
+ if (p->socket_mem)
+ fprintf(f, "%s = %s\n", "socket_mem", p->socket_mem);
+
+ if (p->huge_dir)
+ fprintf(f, "%s = %s\n", "huge_dir", p->huge_dir);
+
+ if (p->file_prefix)
+ fprintf(f, "%s = %s\n", "file_prefix", p->file_prefix);
+
+ if (p->base_virtaddr)
+ fprintf(f, "%s = %s\n", "base_virtaddr", p->base_virtaddr);
+
+ if (p->create_uio_dev_present)
+ fprintf(f, "%s = %s\n", "create_uio_dev",
+ (p->create_uio_dev) ? "yes" : "no");
+
+ if (p->vfio_intr)
+ fprintf(f, "%s = %s\n", "vfio_intr", p->vfio_intr);
+
+ if (p->xen_dom0_present)
+ fprintf(f, "%s = %s\n", "xen_dom0",
+ (p->xen_dom0) ? "yes" : "no");
+
+ fputc('\n', f);
+}
+
+static void
+save_mempool_params(struct app_params *app, FILE *f)
+{
+ struct app_mempool_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->mempool_params);
+ for (i = 0; i < count; i++) {
+ p = &app->mempool_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %" PRIu32 "\n", "buffer_size", p->buffer_size);
+ fprintf(f, "%s = %" PRIu32 "\n", "pool_size", p->pool_size);
+ fprintf(f, "%s = %" PRIu32 "\n", "cache_size", p->cache_size);
+ fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id);
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_links_params(struct app_params *app, FILE *f)
+{
+ struct app_link_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->link_params);
+ for (i = 0; i < count; i++) {
+ p = &app->link_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "; %s = %" PRIu32 "\n", "pmd_id", p->pmd_id);
+ fprintf(f, "%s = %s\n", "promisc", p->promisc ? "yes" : "no");
+ fprintf(f, "%s = %" PRIu32 "\n", "arp_q", p->arp_q);
+ fprintf(f, "%s = %" PRIu32 "\n", "tcp_syn_q",
+ p->tcp_syn_q);
+ fprintf(f, "%s = %" PRIu32 "\n", "ip_local_q", p->ip_local_q);
+ fprintf(f, "%s = %" PRIu32 "\n", "tcp_local_q", p->tcp_local_q);
+ fprintf(f, "%s = %" PRIu32 "\n", "udp_local_q", p->udp_local_q);
+ fprintf(f, "%s = %" PRIu32 "\n", "sctp_local_q",
+ p->sctp_local_q);
+
+ if (strlen(p->pci_bdf))
+ fprintf(f, "%s = %s\n", "pci_bdf", p->pci_bdf);
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_rxq_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_hwq_in_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->hwq_in_params);
+ for (i = 0; i < count; i++) {
+ p = &app->hwq_in_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %s\n",
+ "mempool",
+ app->mempool_params[p->mempool_id].name);
+ fprintf(f, "%s = %" PRIu32 "\n", "size", p->size);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst);
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_txq_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_hwq_out_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->hwq_out_params);
+ for (i = 0; i < count; i++) {
+ p = &app->hwq_out_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %" PRIu32 "\n", "size", p->size);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst);
+ fprintf(f, "%s = %s\n",
+ "dropless",
+ p->dropless ? "yes" : "no");
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_swq_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_swq_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->swq_params);
+ for (i = 0; i < count; i++) {
+ p = &app->swq_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %" PRIu32 "\n", "size", p->size);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst_read", p->burst_read);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst_write", p->burst_write);
+ fprintf(f, "%s = %s\n", "dropless", p->dropless ? "yes" : "no");
+ fprintf(f, "%s = %" PRIu64 "\n", "n_retries", p->n_retries);
+ fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id);
+ fprintf(f, "%s = %s\n", "ipv4_frag", p->ipv4_frag ? "yes" : "no");
+ fprintf(f, "%s = %s\n", "ipv6_frag", p->ipv6_frag ? "yes" : "no");
+ fprintf(f, "%s = %s\n", "ipv4_ras", p->ipv4_ras ? "yes" : "no");
+ fprintf(f, "%s = %s\n", "ipv6_ras", p->ipv6_ras ? "yes" : "no");
+ if ((p->ipv4_frag == 1) || (p->ipv6_frag == 1)) {
+ fprintf(f, "%s = %" PRIu32 "\n", "mtu", p->mtu);
+ fprintf(f, "%s = %" PRIu32 "\n", "metadata_size", p->metadata_size);
+ fprintf(f, "%s = %s\n",
+ "mempool_direct",
+ app->mempool_params[p->mempool_direct_id].name);
+ fprintf(f, "%s = %s\n",
+ "mempool_indirect",
+ app->mempool_params[p->mempool_indirect_id].name);
+ }
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_tm_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_tm_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->tm_params);
+ for (i = 0; i < count; i++) {
+ p = &app->tm_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %s\n", "cfg", p->file_name);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst_read", p->burst_read);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst_write", p->burst_write);
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_source_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_source_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->source_params);
+ for (i = 0; i < count; i++) {
+ p = &app->source_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %s\n",
+ "mempool",
+ app->mempool_params[p->mempool_id].name);
+ fprintf(f, "%s = %" PRIu32 "\n", "burst", p->burst);
+ fprintf(f, "%s = %s\n", "pcap_file_rd", p->file_name);
+ fprintf(f, "%s = %" PRIu32 "\n", "pcap_bytes_rd_per_pkt",
+ p->n_bytes_per_pkt);
+ fputc('\n', f);
+ }
+}
+
+static void
+save_sink_params(struct app_params *app, FILE *f)
+{
+ struct app_pktq_sink_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->sink_params);
+ for (i = 0; i < count; i++) {
+ p = &app->sink_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %s\n", "pcap_file_wr", p->file_name);
+ fprintf(f, "%s = %" PRIu32 "\n",
+ "pcap_n_pkt_wr", p->n_pkts_to_dump);
+ fputc('\n', f);
+ }
+}
+
+static void
+save_msgq_params(struct app_params *app, FILE *f)
+{
+ struct app_msgq_params *p;
+ size_t i, count;
+
+ count = RTE_DIM(app->msgq_params);
+ for (i = 0; i < count; i++) {
+ p = &app->msgq_params[i];
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ fprintf(f, "[%s]\n", p->name);
+ fprintf(f, "%s = %" PRIu32 "\n", "size", p->size);
+ fprintf(f, "%s = %" PRIu32 "\n", "cpu", p->cpu_socket_id);
+
+ fputc('\n', f);
+ }
+}
+
+static void
+save_pipeline_params(struct app_params *app, FILE *f)
+{
+ size_t i, count;
+
+ count = RTE_DIM(app->pipeline_params);
+ for (i = 0; i < count; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+
+ if (!APP_PARAM_VALID(p))
+ continue;
+
+ /* section name */
+ fprintf(f, "[%s]\n", p->name);
+
+ /* type */
+ fprintf(f, "type = %s\n", p->type);
+
+ /* core */
+ fprintf(f, "core = s%" PRIu32 "c%" PRIu32 "%s\n",
+ p->socket_id,
+ p->core_id,
+ (p->hyper_th_id) ? "h" : "");
+
+ /* pktq_in */
+ if (p->n_pktq_in) {
+ uint32_t j;
+
+ fprintf(f, "pktq_in =");
+ for (j = 0; j < p->n_pktq_in; j++) {
+ struct app_pktq_in_params *pp = &p->pktq_in[j];
+ char *name;
+
+ switch (pp->type) {
+ case APP_PKTQ_IN_HWQ:
+ name = app->hwq_in_params[pp->id].name;
+ break;
+ case APP_PKTQ_IN_SWQ:
+ name = app->swq_params[pp->id].name;
+ break;
+ case APP_PKTQ_IN_TM:
+ name = app->tm_params[pp->id].name;
+ break;
+ case APP_PKTQ_IN_SOURCE:
+ name = app->source_params[pp->id].name;
+ break;
+ default:
+ APP_CHECK(0, "System error "
+ "occurred while saving "
+ "parameter to file");
+ }
+
+ fprintf(f, " %s", name);
+ }
+ fprintf(f, "\n");
+ }
+
+ /* pktq_in */
+ if (p->n_pktq_out) {
+ uint32_t j;
+
+ fprintf(f, "pktq_out =");
+ for (j = 0; j < p->n_pktq_out; j++) {
+ struct app_pktq_out_params *pp =
+ &p->pktq_out[j];
+ char *name;
+
+ switch (pp->type) {
+ case APP_PKTQ_OUT_HWQ:
+ name = app->hwq_out_params[pp->id].name;
+ break;
+ case APP_PKTQ_OUT_SWQ:
+ name = app->swq_params[pp->id].name;
+ break;
+ case APP_PKTQ_OUT_TM:
+ name = app->tm_params[pp->id].name;
+ break;
+ case APP_PKTQ_OUT_SINK:
+ name = app->sink_params[pp->id].name;
+ break;
+ default:
+ APP_CHECK(0, "System error "
+ "occurred while saving "
+ "parameter to file");
+ }
+
+ fprintf(f, " %s", name);
+ }
+ fprintf(f, "\n");
+ }
+
+ /* msgq_in */
+ if (p->n_msgq_in) {
+ uint32_t j;
+
+ fprintf(f, "msgq_in =");
+ for (j = 0; j < p->n_msgq_in; j++) {
+ uint32_t id = p->msgq_in[j];
+ char *name = app->msgq_params[id].name;
+
+ fprintf(f, " %s", name);
+ }
+ fprintf(f, "\n");
+ }
+
+ /* msgq_out */
+ if (p->n_msgq_out) {
+ uint32_t j;
+
+ fprintf(f, "msgq_out =");
+ for (j = 0; j < p->n_msgq_out; j++) {
+ uint32_t id = p->msgq_out[j];
+ char *name = app->msgq_params[id].name;
+
+ fprintf(f, " %s", name);
+ }
+ fprintf(f, "\n");
+ }
+
+ /* timer_period */
+ fprintf(f, "timer_period = %" PRIu32 "\n", p->timer_period);
+
+ /* args */
+ if (p->n_args) {
+ uint32_t j;
+
+ for (j = 0; j < p->n_args; j++)
+ fprintf(f, "%s = %s\n", p->args_name[j],
+ p->args_value[j]);
+ }
+
+ fprintf(f, "\n");
+ }
+}
+
+void
+app_config_save(struct app_params *app, const char *file_name)
+{
+ FILE *file;
+ char *name, *dir_name;
+ int status;
+
+ name = strdup(file_name);
+ dir_name = dirname(name);
+ status = access(dir_name, W_OK);
+ APP_CHECK((status == 0),
+ "Error: need write access privilege to directory "
+ "\"%s\" to save configuration\n", dir_name);
+
+ file = fopen(file_name, "w");
+ APP_CHECK((file != NULL),
+ "Error: failed to save configuration to file \"%s\"",
+ file_name);
+
+ save_eal_params(app, file);
+ save_pipeline_params(app, file);
+ save_mempool_params(app, file);
+ save_links_params(app, file);
+ save_rxq_params(app, file);
+ save_txq_params(app, file);
+ save_swq_params(app, file);
+ save_tm_params(app, file);
+ save_source_params(app, file);
+ save_sink_params(app, file);
+ save_msgq_params(app, file);
+
+ fclose(file);
+ free(name);
+}
+
+int
+app_config_init(struct app_params *app)
+{
+ size_t i;
+
+ memcpy(app, &app_params_default, sizeof(struct app_params));
+
+ for (i = 0; i < RTE_DIM(app->mempool_params); i++)
+ memcpy(&app->mempool_params[i],
+ &mempool_params_default,
+ sizeof(struct app_mempool_params));
+
+ for (i = 0; i < RTE_DIM(app->link_params); i++)
+ memcpy(&app->link_params[i],
+ &link_params_default,
+ sizeof(struct app_link_params));
+
+ for (i = 0; i < RTE_DIM(app->hwq_in_params); i++)
+ memcpy(&app->hwq_in_params[i],
+ &default_hwq_in_params,
+ sizeof(default_hwq_in_params));
+
+ for (i = 0; i < RTE_DIM(app->hwq_out_params); i++)
+ memcpy(&app->hwq_out_params[i],
+ &default_hwq_out_params,
+ sizeof(default_hwq_out_params));
+
+ for (i = 0; i < RTE_DIM(app->swq_params); i++)
+ memcpy(&app->swq_params[i],
+ &default_swq_params,
+ sizeof(default_swq_params));
+
+ for (i = 0; i < RTE_DIM(app->tm_params); i++)
+ memcpy(&app->tm_params[i],
+ &default_tm_params,
+ sizeof(default_tm_params));
+
+ for (i = 0; i < RTE_DIM(app->source_params); i++)
+ memcpy(&app->source_params[i],
+ &default_source_params,
+ sizeof(default_source_params));
+
+ for (i = 0; i < RTE_DIM(app->sink_params); i++)
+ memcpy(&app->sink_params[i],
+ &default_sink_params,
+ sizeof(default_sink_params));
+
+ for (i = 0; i < RTE_DIM(app->msgq_params); i++)
+ memcpy(&app->msgq_params[i],
+ &default_msgq_params,
+ sizeof(default_msgq_params));
+
+ for (i = 0; i < RTE_DIM(app->pipeline_params); i++)
+ memcpy(&app->pipeline_params[i],
+ &default_pipeline_params,
+ sizeof(default_pipeline_params));
+
+ return 0;
+}
+
+static char *
+filenamedup(const char *filename, const char *suffix)
+{
+ char *s = malloc(strlen(filename) + strlen(suffix) + 1);
+
+ if (!s)
+ return NULL;
+
+ sprintf(s, "%s%s", filename, suffix);
+ return s;
+}
+
+int
+app_config_args(struct app_params *app, int argc, char **argv)
+{
+ const char *optname;
+ int opt, option_index;
+ int f_present, s_present, p_present, l_present;
+ int preproc_present, preproc_params_present;
+ int scaned = 0;
+
+ static struct option lgopts[] = {
+ { "preproc", 1, 0, 0 },
+ { "preproc-args", 1, 0, 0 },
+ { NULL, 0, 0, 0 }
+ };
+
+ /* Copy application name */
+ strncpy(app->app_name, argv[0], APP_APPNAME_SIZE - 1);
+
+ f_present = 0;
+ s_present = 0;
+ p_present = 0;
+ l_present = 0;
+ preproc_present = 0;
+ preproc_params_present = 0;
+
+ while ((opt = getopt_long(argc, argv, "f:s:p:l:", lgopts,
+ &option_index)) != EOF)
+ switch (opt) {
+ case 'f':
+ if (f_present)
+ rte_panic("Error: Config file is provided "
+ "more than once\n");
+ f_present = 1;
+
+ if (!strlen(optarg))
+ rte_panic("Error: Config file name is null\n");
+
+ app->config_file = strdup(optarg);
+ if (app->config_file == NULL)
+ rte_panic("Error: Memory allocation failure\n");
+
+ break;
+
+ case 's':
+ if (s_present)
+ rte_panic("Error: Script file is provided "
+ "more than once\n");
+ s_present = 1;
+
+ if (!strlen(optarg))
+ rte_panic("Error: Script file name is null\n");
+
+ app->script_file = strdup(optarg);
+ if (app->script_file == NULL)
+ rte_panic("Error: Memory allocation failure\n");
+
+ break;
+
+ case 'p':
+ if (p_present)
+ rte_panic("Error: PORT_MASK is provided "
+ "more than once\n");
+ p_present = 1;
+
+ if ((sscanf(optarg, "%" SCNx64 "%n", &app->port_mask,
+ &scaned) != 1) ||
+ ((size_t) scaned != strlen(optarg)))
+ rte_panic("Error: PORT_MASK is not "
+ "a hexadecimal integer\n");
+
+ if (app->port_mask == 0)
+ rte_panic("Error: PORT_MASK is null\n");
+
+ break;
+
+ case 'l':
+ if (l_present)
+ rte_panic("Error: LOG_LEVEL is provided "
+ "more than once\n");
+ l_present = 1;
+
+ if ((sscanf(optarg, "%" SCNu32 "%n", &app->log_level,
+ &scaned) != 1) ||
+ ((size_t) scaned != strlen(optarg)) ||
+ (app->log_level >= APP_LOG_LEVELS))
+ rte_panic("Error: LOG_LEVEL invalid value\n");
+
+ break;
+
+ case 0:
+ optname = lgopts[option_index].name;
+
+ if (strcmp(optname, "preproc") == 0) {
+ if (preproc_present)
+ rte_panic("Error: Preprocessor argument "
+ "is provided more than once\n");
+ preproc_present = 1;
+
+ app->preproc = strdup(optarg);
+ break;
+ }
+
+ if (strcmp(optname, "preproc-args") == 0) {
+ if (preproc_params_present)
+ rte_panic("Error: Preprocessor args "
+ "are provided more than once\n");
+ preproc_params_present = 1;
+
+ app->preproc_args = strdup(optarg);
+ break;
+ }
+
+ app_print_usage(argv[0]);
+ break;
+
+ default:
+ app_print_usage(argv[0]);
+ }
+
+ optind = 0; /* reset getopt lib */
+
+ /* Check dependencies between args */
+ if (preproc_params_present && (preproc_present == 0))
+ rte_panic("Error: Preprocessor args specified while "
+ "preprocessor is not defined\n");
+
+ app->parser_file = preproc_present ?
+ filenamedup(app->config_file, ".preproc") :
+ strdup(app->config_file);
+ app->output_file = filenamedup(app->config_file, ".out");
+
+ return 0;
+}
+
+int
+app_config_preproc(struct app_params *app)
+{
+ char buffer[256];
+ int status;
+
+ if (app->preproc == NULL)
+ return 0;
+
+ status = access(app->config_file, F_OK | R_OK);
+ APP_CHECK((status == 0), "Error: Unable to open file %s",
+ app->config_file);
+
+ snprintf(buffer, sizeof(buffer), "%s %s %s > %s",
+ app->preproc,
+ app->preproc_args ? app->preproc_args : "",
+ app->config_file,
+ app->parser_file);
+
+ status = system(buffer);
+ APP_CHECK((WIFEXITED(status) && (WEXITSTATUS(status) == 0)),
+ "Error occurred while pre-processing file \"%s\"\n",
+ app->config_file);
+
+ return status;
+}
diff --git a/examples/ip_pipeline/config_parse_tm.c b/examples/ip_pipeline/config_parse_tm.c
new file mode 100644
index 00000000..e75eed71
--- /dev/null
+++ b/examples/ip_pipeline/config_parse_tm.c
@@ -0,0 +1,448 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <ctype.h>
+#include <getopt.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <string.h>
+#include <libgen.h>
+#include <unistd.h>
+
+#include <rte_errno.h>
+#include <rte_cfgfile.h>
+#include <rte_string_fns.h>
+
+#include "app.h"
+
+static int
+tm_cfgfile_load_sched_port(
+ struct rte_cfgfile *file,
+ struct rte_sched_port_params *port_params)
+{
+ const char *entry;
+ int j;
+
+ entry = rte_cfgfile_get_entry(file, "port", "frame overhead");
+ if (entry)
+ port_params->frame_overhead = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, "port", "mtu");
+ if (entry)
+ port_params->mtu = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ "port",
+ "number of subports per port");
+ if (entry)
+ port_params->n_subports_per_port = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ "port",
+ "number of pipes per subport");
+ if (entry)
+ port_params->n_pipes_per_subport = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, "port", "queue sizes");
+ if (entry) {
+ char *next;
+
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ port_params->qsize[j] = (uint16_t)
+ strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+#ifdef RTE_SCHED_RED
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ char str[32];
+
+ /* Parse WRED min thresholds */
+ snprintf(str, sizeof(str), "tc %" PRId32 " wred min", j);
+ entry = rte_cfgfile_get_entry(file, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].min_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED max thresholds */
+ snprintf(str, sizeof(str), "tc %" PRId32 " wred max", j);
+ entry = rte_cfgfile_get_entry(file, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].max_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED inverse mark probabilities */
+ snprintf(str, sizeof(str), "tc %" PRId32 " wred inv prob", j);
+ entry = rte_cfgfile_get_entry(file, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].maxp_inv
+ = (uint8_t)strtol(entry, &next, 10);
+
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED EWMA filter weights */
+ snprintf(str, sizeof(str), "tc %" PRId32 " wred weight", j);
+ entry = rte_cfgfile_get_entry(file, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].wq_log2
+ = (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ }
+#endif /* RTE_SCHED_RED */
+
+ return 0;
+}
+
+static int
+tm_cfgfile_load_sched_pipe(
+ struct rte_cfgfile *file,
+ struct rte_sched_port_params *port_params,
+ struct rte_sched_pipe_params *pipe_params)
+{
+ int i, j;
+ char *next;
+ const char *entry;
+ int profiles;
+
+ profiles = rte_cfgfile_num_sections(file,
+ "pipe profile", sizeof("pipe profile") - 1);
+ port_params->n_pipe_profiles = profiles;
+
+ for (j = 0; j < profiles; j++) {
+ char pipe_name[32];
+
+ snprintf(pipe_name, sizeof(pipe_name),
+ "pipe profile %" PRId32, j);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tb rate");
+ if (entry)
+ pipe_params[j].tb_rate = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tb size");
+ if (entry)
+ pipe_params[j].tb_size = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc period");
+ if (entry)
+ pipe_params[j].tc_period = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 0 rate");
+ if (entry)
+ pipe_params[j].tc_rate[0] = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 1 rate");
+ if (entry)
+ pipe_params[j].tc_rate[1] = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 2 rate");
+ if (entry)
+ pipe_params[j].tc_rate[2] = (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 3 rate");
+ if (entry)
+ pipe_params[j].tc_rate[3] = (uint32_t) atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ entry = rte_cfgfile_get_entry(file, pipe_name,
+ "tc 3 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight = (uint8_t)atoi(entry);
+#endif
+
+ entry = rte_cfgfile_get_entry(file,
+ pipe_name,
+ "tc 0 wrr weights");
+ if (entry)
+ for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
+ (uint8_t) strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 1 wrr weights");
+ if (entry)
+ for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
+ (uint8_t) strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 2 wrr weights");
+ if (entry)
+ for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
+ (uint8_t) strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+
+ entry = rte_cfgfile_get_entry(file, pipe_name, "tc 3 wrr weights");
+ if (entry)
+ for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
+ (uint8_t) strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ return 0;
+}
+
+static int
+tm_cfgfile_load_sched_subport(
+ struct rte_cfgfile *file,
+ struct rte_sched_subport_params *subport_params,
+ int *pipe_to_profile)
+{
+ const char *entry;
+ int i, j, k;
+
+ for (i = 0; i < APP_MAX_SCHED_SUBPORTS; i++) {
+ char sec_name[CFG_NAME_LEN];
+
+ snprintf(sec_name, sizeof(sec_name),
+ "subport %" PRId32, i);
+
+ if (rte_cfgfile_has_section(file, sec_name)) {
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tb rate");
+ if (entry)
+ subport_params[i].tb_rate =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tb size");
+ if (entry)
+ subport_params[i].tb_size =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tc period");
+ if (entry)
+ subport_params[i].tc_period =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tc 0 rate");
+ if (entry)
+ subport_params[i].tc_rate[0] =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tc 1 rate");
+ if (entry)
+ subport_params[i].tc_rate[1] =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tc 2 rate");
+ if (entry)
+ subport_params[i].tc_rate[2] =
+ (uint32_t) atoi(entry);
+
+ entry = rte_cfgfile_get_entry(file,
+ sec_name,
+ "tc 3 rate");
+ if (entry)
+ subport_params[i].tc_rate[3] =
+ (uint32_t) atoi(entry);
+
+ int n_entries = rte_cfgfile_section_num_entries(file,
+ sec_name);
+ struct rte_cfgfile_entry entries[n_entries];
+
+ rte_cfgfile_section_entries(file,
+ sec_name,
+ entries,
+ n_entries);
+
+ for (j = 0; j < n_entries; j++)
+ if (strncmp("pipe",
+ entries[j].name,
+ sizeof("pipe") - 1) == 0) {
+ int profile;
+ char *tokens[2] = {NULL, NULL};
+ int n_tokens;
+ int begin, end;
+ char name[CFG_NAME_LEN + 1];
+
+ profile = atoi(entries[j].value);
+ strncpy(name,
+ entries[j].name,
+ sizeof(name));
+ n_tokens = rte_strsplit(
+ &name[sizeof("pipe")],
+ strnlen(name, CFG_NAME_LEN),
+ tokens, 2, '-');
+
+ begin = atoi(tokens[0]);
+ if (n_tokens == 2)
+ end = atoi(tokens[1]);
+ else
+ end = begin;
+
+ if ((end >= APP_MAX_SCHED_PIPES) ||
+ (begin > end))
+ return -1;
+
+ for (k = begin; k <= end; k++) {
+ char profile_name[CFG_NAME_LEN];
+
+ snprintf(profile_name,
+ sizeof(profile_name),
+ "pipe profile %" PRId32,
+ profile);
+ if (rte_cfgfile_has_section(file, profile_name))
+ pipe_to_profile[i * APP_MAX_SCHED_PIPES + k] = profile;
+ else
+ rte_exit(EXIT_FAILURE,
+ "Wrong pipe profile %s\n",
+ entries[j].value);
+ }
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+tm_cfgfile_load(struct app_pktq_tm_params *tm)
+{
+ struct rte_cfgfile *file;
+ uint32_t i;
+
+ memset(tm->sched_subport_params, 0, sizeof(tm->sched_subport_params));
+ memset(tm->sched_pipe_profiles, 0, sizeof(tm->sched_pipe_profiles));
+ memset(&tm->sched_port_params, 0, sizeof(tm->sched_port_params));
+ for (i = 0; i < APP_MAX_SCHED_SUBPORTS * APP_MAX_SCHED_PIPES; i++)
+ tm->sched_pipe_to_profile[i] = -1;
+
+ tm->sched_port_params.pipe_profiles = &tm->sched_pipe_profiles[0];
+
+ if (tm->file_name[0] == '\0')
+ return -1;
+
+ file = rte_cfgfile_load(tm->file_name, 0);
+ if (file == NULL)
+ return -1;
+
+ tm_cfgfile_load_sched_port(file,
+ &tm->sched_port_params);
+ tm_cfgfile_load_sched_subport(file,
+ tm->sched_subport_params,
+ tm->sched_pipe_to_profile);
+ tm_cfgfile_load_sched_pipe(file,
+ &tm->sched_port_params,
+ tm->sched_pipe_profiles);
+
+ rte_cfgfile_close(file);
+ return 0;
+}
+
+int
+app_config_parse_tm(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < RTE_DIM(app->tm_params); i++) {
+ struct app_pktq_tm_params *p = &app->tm_params[i];
+ int status;
+
+ if (!APP_PARAM_VALID(p))
+ break;
+
+ status = tm_cfgfile_load(p);
+ APP_CHECK(status == 0,
+ "Parse error for %s configuration file \"%s\"\n",
+ p->name,
+ p->file_name);
+ }
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/cpu_core_map.c b/examples/ip_pipeline/cpu_core_map.c
new file mode 100644
index 00000000..cb088b1c
--- /dev/null
+++ b/examples/ip_pipeline/cpu_core_map.c
@@ -0,0 +1,492 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#include "cpu_core_map.h"
+
+struct cpu_core_map {
+ uint32_t n_max_sockets;
+ uint32_t n_max_cores_per_socket;
+ uint32_t n_max_ht_per_core;
+ uint32_t n_sockets;
+ uint32_t n_cores_per_socket;
+ uint32_t n_ht_per_core;
+ int map[0];
+};
+
+static inline uint32_t
+cpu_core_map_pos(struct cpu_core_map *map,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t ht_id)
+{
+ return (socket_id * map->n_max_cores_per_socket + core_id) *
+ map->n_max_ht_per_core + ht_id;
+}
+
+static int
+cpu_core_map_compute_eal(struct cpu_core_map *map);
+
+static int
+cpu_core_map_compute_linux(struct cpu_core_map *map);
+
+static int
+cpu_core_map_compute_and_check(struct cpu_core_map *map);
+
+struct cpu_core_map *
+cpu_core_map_init(uint32_t n_max_sockets,
+ uint32_t n_max_cores_per_socket,
+ uint32_t n_max_ht_per_core,
+ uint32_t eal_initialized)
+{
+ uint32_t map_size, map_mem_size, i;
+ struct cpu_core_map *map;
+ int status;
+
+ /* Check input arguments */
+ if ((n_max_sockets == 0) ||
+ (n_max_cores_per_socket == 0) ||
+ (n_max_ht_per_core == 0))
+ return NULL;
+
+ /* Memory allocation */
+ map_size = n_max_sockets * n_max_cores_per_socket * n_max_ht_per_core;
+ map_mem_size = sizeof(struct cpu_core_map) + map_size * sizeof(int);
+ map = (struct cpu_core_map *) malloc(map_mem_size);
+ if (map == NULL)
+ return NULL;
+
+ /* Initialization */
+ map->n_max_sockets = n_max_sockets;
+ map->n_max_cores_per_socket = n_max_cores_per_socket;
+ map->n_max_ht_per_core = n_max_ht_per_core;
+ map->n_sockets = 0;
+ map->n_cores_per_socket = 0;
+ map->n_ht_per_core = 0;
+
+ for (i = 0; i < map_size; i++)
+ map->map[i] = -1;
+
+ status = (eal_initialized) ?
+ cpu_core_map_compute_eal(map) :
+ cpu_core_map_compute_linux(map);
+
+ if (status) {
+ free(map);
+ return NULL;
+ }
+
+ status = cpu_core_map_compute_and_check(map);
+ if (status) {
+ free(map);
+ return NULL;
+ }
+
+ return map;
+}
+
+int
+cpu_core_map_compute_eal(struct cpu_core_map *map)
+{
+ uint32_t socket_id, core_id, ht_id;
+
+ /* Compute map */
+ for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) {
+ uint32_t n_detected, core_id_contig;
+ int lcore_id;
+
+ n_detected = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ struct lcore_config *p = &lcore_config[lcore_id];
+
+ if ((p->detected) && (p->socket_id == socket_id))
+ n_detected++;
+ }
+
+ core_id_contig = 0;
+
+ for (core_id = 0; n_detected ; core_id++) {
+ ht_id = 0;
+
+ for (lcore_id = 0;
+ lcore_id < RTE_MAX_LCORE;
+ lcore_id++) {
+ struct lcore_config *p =
+ &lcore_config[lcore_id];
+
+ if ((p->detected) &&
+ (p->socket_id == socket_id) &&
+ (p->core_id == core_id)) {
+ uint32_t pos = cpu_core_map_pos(map,
+ socket_id,
+ core_id_contig,
+ ht_id);
+
+ map->map[pos] = lcore_id;
+ ht_id++;
+ n_detected--;
+ }
+ }
+
+ if (ht_id) {
+ core_id_contig++;
+ if (core_id_contig ==
+ map->n_max_cores_per_socket)
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int
+cpu_core_map_compute_and_check(struct cpu_core_map *map)
+{
+ uint32_t socket_id, core_id, ht_id;
+
+ /* Compute n_ht_per_core, n_cores_per_socket, n_sockets */
+ for (ht_id = 0; ht_id < map->n_max_ht_per_core; ht_id++) {
+ if (map->map[ht_id] == -1)
+ break;
+
+ map->n_ht_per_core++;
+ }
+
+ if (map->n_ht_per_core == 0)
+ return -1;
+
+ for (core_id = 0; core_id < map->n_max_cores_per_socket; core_id++) {
+ uint32_t pos = core_id * map->n_max_ht_per_core;
+
+ if (map->map[pos] == -1)
+ break;
+
+ map->n_cores_per_socket++;
+ }
+
+ if (map->n_cores_per_socket == 0)
+ return -1;
+
+ for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) {
+ uint32_t pos = socket_id * map->n_max_cores_per_socket *
+ map->n_max_ht_per_core;
+
+ if (map->map[pos] == -1)
+ break;
+
+ map->n_sockets++;
+ }
+
+ if (map->n_sockets == 0)
+ return -1;
+
+ /* Check that each socket has exactly the same number of cores
+ and that each core has exactly the same number of hyper-threads */
+ for (socket_id = 0; socket_id < map->n_sockets; socket_id++) {
+ for (core_id = 0; core_id < map->n_cores_per_socket; core_id++)
+ for (ht_id = 0;
+ ht_id < map->n_max_ht_per_core;
+ ht_id++) {
+ uint32_t pos = (socket_id *
+ map->n_max_cores_per_socket + core_id) *
+ map->n_max_ht_per_core + ht_id;
+
+ if (((ht_id < map->n_ht_per_core) &&
+ (map->map[pos] == -1)) ||
+ ((ht_id >= map->n_ht_per_core) &&
+ (map->map[pos] != -1)))
+ return -1;
+ }
+
+ for ( ; core_id < map->n_max_cores_per_socket; core_id++)
+ for (ht_id = 0;
+ ht_id < map->n_max_ht_per_core;
+ ht_id++) {
+ uint32_t pos = cpu_core_map_pos(map,
+ socket_id,
+ core_id,
+ ht_id);
+
+ if (map->map[pos] != -1)
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+#define FILE_LINUX_CPU_N_LCORES \
+ "/sys/devices/system/cpu/present"
+
+static int
+cpu_core_map_get_n_lcores_linux(void)
+{
+ char buffer[64], *string;
+ FILE *fd;
+
+ fd = fopen(FILE_LINUX_CPU_N_LCORES, "r");
+ if (fd == NULL)
+ return -1;
+
+ if (fgets(buffer, sizeof(buffer), fd) == NULL) {
+ fclose(fd);
+ return -1;
+ }
+
+ fclose(fd);
+
+ string = index(buffer, '-');
+ if (string == NULL)
+ return -1;
+
+ return atoi(++string) + 1;
+}
+
+#define FILE_LINUX_CPU_CORE_ID \
+ "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/core_id"
+
+static int
+cpu_core_map_get_core_id_linux(int lcore_id)
+{
+ char buffer[64];
+ FILE *fd;
+ int core_id;
+
+ snprintf(buffer, sizeof(buffer), FILE_LINUX_CPU_CORE_ID, lcore_id);
+ fd = fopen(buffer, "r");
+ if (fd == NULL)
+ return -1;
+
+ if (fgets(buffer, sizeof(buffer), fd) == NULL) {
+ fclose(fd);
+ return -1;
+ }
+
+ fclose(fd);
+
+ core_id = atoi(buffer);
+ return core_id;
+}
+
+#define FILE_LINUX_CPU_SOCKET_ID \
+ "/sys/devices/system/cpu/cpu%" PRIu32 "/topology/physical_package_id"
+
+static int
+cpu_core_map_get_socket_id_linux(int lcore_id)
+{
+ char buffer[64];
+ FILE *fd;
+ int socket_id;
+
+ snprintf(buffer, sizeof(buffer), FILE_LINUX_CPU_SOCKET_ID, lcore_id);
+ fd = fopen(buffer, "r");
+ if (fd == NULL)
+ return -1;
+
+ if (fgets(buffer, sizeof(buffer), fd) == NULL) {
+ fclose(fd);
+ return -1;
+ }
+
+ fclose(fd);
+
+ socket_id = atoi(buffer);
+ return socket_id;
+}
+
+int
+cpu_core_map_compute_linux(struct cpu_core_map *map)
+{
+ uint32_t socket_id, core_id, ht_id;
+ int n_lcores;
+
+ n_lcores = cpu_core_map_get_n_lcores_linux();
+ if (n_lcores <= 0)
+ return -1;
+
+ /* Compute map */
+ for (socket_id = 0; socket_id < map->n_max_sockets; socket_id++) {
+ uint32_t n_detected, core_id_contig;
+ int lcore_id;
+
+ n_detected = 0;
+ for (lcore_id = 0; lcore_id < n_lcores; lcore_id++) {
+ int lcore_socket_id =
+ cpu_core_map_get_socket_id_linux(lcore_id);
+
+ if (lcore_socket_id < 0)
+ return -1;
+
+ if (((uint32_t) lcore_socket_id) == socket_id)
+ n_detected++;
+ }
+
+ core_id_contig = 0;
+
+ for (core_id = 0; n_detected ; core_id++) {
+ ht_id = 0;
+
+ for (lcore_id = 0; lcore_id < n_lcores; lcore_id++) {
+ int lcore_socket_id =
+ cpu_core_map_get_socket_id_linux(
+ lcore_id);
+
+ if (lcore_socket_id < 0)
+ return -1;
+
+ int lcore_core_id =
+ cpu_core_map_get_core_id_linux(
+ lcore_id);
+
+ if (lcore_core_id < 0)
+ return -1;
+
+ if (((uint32_t) lcore_socket_id == socket_id) &&
+ ((uint32_t) lcore_core_id == core_id)) {
+ uint32_t pos = cpu_core_map_pos(map,
+ socket_id,
+ core_id_contig,
+ ht_id);
+
+ map->map[pos] = lcore_id;
+ ht_id++;
+ n_detected--;
+ }
+ }
+
+ if (ht_id) {
+ core_id_contig++;
+ if (core_id_contig ==
+ map->n_max_cores_per_socket)
+ return -1;
+ }
+ }
+ }
+
+ return 0;
+}
+
+void
+cpu_core_map_print(struct cpu_core_map *map)
+{
+ uint32_t socket_id, core_id, ht_id;
+
+ if (map == NULL)
+ return;
+
+ for (socket_id = 0; socket_id < map->n_sockets; socket_id++) {
+ printf("Socket %" PRIu32 ":\n", socket_id);
+
+ for (core_id = 0;
+ core_id < map->n_cores_per_socket;
+ core_id++) {
+ printf("[%" PRIu32 "] = [", core_id);
+
+ for (ht_id = 0; ht_id < map->n_ht_per_core; ht_id++) {
+ int lcore_id = cpu_core_map_get_lcore_id(map,
+ socket_id,
+ core_id,
+ ht_id);
+
+ uint32_t core_id_noncontig =
+ cpu_core_map_get_core_id_linux(
+ lcore_id);
+
+ printf(" %" PRId32 " (%" PRIu32 ") ",
+ lcore_id,
+ core_id_noncontig);
+ }
+
+ printf("]\n");
+ }
+ }
+}
+
+uint32_t
+cpu_core_map_get_n_sockets(struct cpu_core_map *map)
+{
+ if (map == NULL)
+ return 0;
+
+ return map->n_sockets;
+}
+
+uint32_t
+cpu_core_map_get_n_cores_per_socket(struct cpu_core_map *map)
+{
+ if (map == NULL)
+ return 0;
+
+ return map->n_cores_per_socket;
+}
+
+uint32_t
+cpu_core_map_get_n_ht_per_core(struct cpu_core_map *map)
+{
+ if (map == NULL)
+ return 0;
+
+ return map->n_ht_per_core;
+}
+
+int
+cpu_core_map_get_lcore_id(struct cpu_core_map *map,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t ht_id)
+{
+ uint32_t pos;
+
+ if ((map == NULL) ||
+ (socket_id >= map->n_sockets) ||
+ (core_id >= map->n_cores_per_socket) ||
+ (ht_id >= map->n_ht_per_core))
+ return -1;
+
+ pos = cpu_core_map_pos(map, socket_id, core_id, ht_id);
+
+ return map->map[pos];
+}
+
+void
+cpu_core_map_free(struct cpu_core_map *map)
+{
+ free(map);
+}
diff --git a/examples/ip_pipeline/cpu_core_map.h b/examples/ip_pipeline/cpu_core_map.h
new file mode 100644
index 00000000..5c2ec729
--- /dev/null
+++ b/examples/ip_pipeline/cpu_core_map.h
@@ -0,0 +1,69 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_CPU_CORE_MAP_H__
+#define __INCLUDE_CPU_CORE_MAP_H__
+
+#include <stdio.h>
+
+#include <rte_lcore.h>
+
+struct cpu_core_map;
+
+struct cpu_core_map *
+cpu_core_map_init(uint32_t n_max_sockets,
+ uint32_t n_max_cores_per_socket,
+ uint32_t n_max_ht_per_core,
+ uint32_t eal_initialized);
+
+uint32_t
+cpu_core_map_get_n_sockets(struct cpu_core_map *map);
+
+uint32_t
+cpu_core_map_get_n_cores_per_socket(struct cpu_core_map *map);
+
+uint32_t
+cpu_core_map_get_n_ht_per_core(struct cpu_core_map *map);
+
+int
+cpu_core_map_get_lcore_id(struct cpu_core_map *map,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t ht_id);
+
+void cpu_core_map_print(struct cpu_core_map *map);
+
+void
+cpu_core_map_free(struct cpu_core_map *map);
+
+#endif
diff --git a/examples/ip_pipeline/init.c b/examples/ip_pipeline/init.c
new file mode 100644
index 00000000..83422e88
--- /dev/null
+++ b/examples/ip_pipeline/init.c
@@ -0,0 +1,1637 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+
+#include "app.h"
+#include "pipeline.h"
+#include "pipeline_common_fe.h"
+#include "pipeline_master.h"
+#include "pipeline_passthrough.h"
+#include "pipeline_firewall.h"
+#include "pipeline_flow_classification.h"
+#include "pipeline_flow_actions.h"
+#include "pipeline_routing.h"
+#include "thread_fe.h"
+
+#define APP_NAME_SIZE 32
+
+static void
+app_init_core_map(struct app_params *app)
+{
+ APP_LOG(app, HIGH, "Initializing CPU core map ...");
+ app->core_map = cpu_core_map_init(4, 32, 4, 0);
+
+ if (app->core_map == NULL)
+ rte_panic("Cannot create CPU core map\n");
+
+ if (app->log_level >= APP_LOG_LEVEL_LOW)
+ cpu_core_map_print(app->core_map);
+}
+
+static void
+app_init_core_mask(struct app_params *app)
+{
+ uint64_t mask = 0;
+ uint32_t i;
+
+ for (i = 0; i < app->n_pipelines; i++) {
+ struct app_pipeline_params *p = &app->pipeline_params[i];
+ int lcore_id;
+
+ lcore_id = cpu_core_map_get_lcore_id(app->core_map,
+ p->socket_id,
+ p->core_id,
+ p->hyper_th_id);
+
+ if (lcore_id < 0)
+ rte_panic("Cannot create CPU core mask\n");
+
+ mask |= 1LLU << lcore_id;
+ }
+
+ app->core_mask = mask;
+ APP_LOG(app, HIGH, "CPU core mask = 0x%016" PRIx64, app->core_mask);
+}
+
+static void
+app_init_eal(struct app_params *app)
+{
+ char buffer[256];
+ struct app_eal_params *p = &app->eal_params;
+ uint32_t n_args = 0;
+ uint32_t i;
+ int status;
+
+ app->eal_argv[n_args++] = strdup(app->app_name);
+
+ snprintf(buffer, sizeof(buffer), "-c%" PRIx64, app->core_mask);
+ app->eal_argv[n_args++] = strdup(buffer);
+
+ if (p->coremap) {
+ snprintf(buffer, sizeof(buffer), "--lcores=%s", p->coremap);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->master_lcore_present) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--master-lcore=%" PRIu32,
+ p->master_lcore);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ snprintf(buffer, sizeof(buffer), "-n%" PRIu32, p->channels);
+ app->eal_argv[n_args++] = strdup(buffer);
+
+ if (p->memory_present) {
+ snprintf(buffer, sizeof(buffer), "-m%" PRIu32, p->memory);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->ranks_present) {
+ snprintf(buffer, sizeof(buffer), "-r%" PRIu32, p->ranks);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_blacklist[i] == NULL)
+ break;
+
+ snprintf(buffer,
+ sizeof(buffer),
+ "--pci-blacklist=%s",
+ p->pci_blacklist[i]);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (app->port_mask != 0)
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->pci_whitelist[i] == NULL)
+ break;
+
+ snprintf(buffer,
+ sizeof(buffer),
+ "--pci-whitelist=%s",
+ p->pci_whitelist[i]);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+ else
+ for (i = 0; i < app->n_links; i++) {
+ char *pci_bdf = app->link_params[i].pci_bdf;
+
+ snprintf(buffer,
+ sizeof(buffer),
+ "--pci-whitelist=%s",
+ pci_bdf);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ for (i = 0; i < APP_MAX_LINKS; i++) {
+ if (p->vdev[i] == NULL)
+ break;
+
+ snprintf(buffer,
+ sizeof(buffer),
+ "--vdev=%s",
+ p->vdev[i]);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->vmware_tsc_map_present) && p->vmware_tsc_map) {
+ snprintf(buffer, sizeof(buffer), "--vmware-tsc-map");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->proc_type) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--proc-type=%s",
+ p->proc_type);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->syslog) {
+ snprintf(buffer, sizeof(buffer), "--syslog=%s", p->syslog);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->log_level_present) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--log-level=%" PRIu32,
+ p->log_level);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->version_present) && p->version) {
+ snprintf(buffer, sizeof(buffer), "-v");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->help_present) && p->help) {
+ snprintf(buffer, sizeof(buffer), "--help");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->no_huge_present) && p->no_huge) {
+ snprintf(buffer, sizeof(buffer), "--no-huge");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->no_pci_present) && p->no_pci) {
+ snprintf(buffer, sizeof(buffer), "--no-pci");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->no_hpet_present) && p->no_hpet) {
+ snprintf(buffer, sizeof(buffer), "--no-hpet");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->no_shconf_present) && p->no_shconf) {
+ snprintf(buffer, sizeof(buffer), "--no-shconf");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->add_driver) {
+ snprintf(buffer, sizeof(buffer), "-d=%s", p->add_driver);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->socket_mem) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--socket-mem=%s",
+ p->socket_mem);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->huge_dir) {
+ snprintf(buffer, sizeof(buffer), "--huge-dir=%s", p->huge_dir);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->file_prefix) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--file-prefix=%s",
+ p->file_prefix);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->base_virtaddr) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--base-virtaddr=%s",
+ p->base_virtaddr);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->create_uio_dev_present) && p->create_uio_dev) {
+ snprintf(buffer, sizeof(buffer), "--create-uio-dev");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if (p->vfio_intr) {
+ snprintf(buffer,
+ sizeof(buffer),
+ "--vfio-intr=%s",
+ p->vfio_intr);
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ if ((p->xen_dom0_present) && (p->xen_dom0)) {
+ snprintf(buffer, sizeof(buffer), "--xen-dom0");
+ app->eal_argv[n_args++] = strdup(buffer);
+ }
+
+ snprintf(buffer, sizeof(buffer), "--");
+ app->eal_argv[n_args++] = strdup(buffer);
+
+ app->eal_argc = n_args;
+
+ APP_LOG(app, HIGH, "Initializing EAL ...");
+ if (app->log_level >= APP_LOG_LEVEL_LOW) {
+ int i;
+
+ fprintf(stdout, "[APP] EAL arguments: \"");
+ for (i = 1; i < app->eal_argc; i++)
+ fprintf(stdout, "%s ", app->eal_argv[i]);
+ fprintf(stdout, "\"\n");
+ }
+
+ status = rte_eal_init(app->eal_argc, app->eal_argv);
+ if (status < 0)
+ rte_panic("EAL init error\n");
+}
+
+static void
+app_init_mempool(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_mempools; i++) {
+ struct app_mempool_params *p = &app->mempool_params[i];
+
+ APP_LOG(app, HIGH, "Initializing %s ...", p->name);
+ app->mempool[i] = rte_mempool_create(
+ p->name,
+ p->pool_size,
+ p->buffer_size,
+ p->cache_size,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ p->cpu_socket_id,
+ 0);
+
+ if (app->mempool[i] == NULL)
+ rte_panic("%s init error\n", p->name);
+ }
+}
+
+static inline int
+app_link_filter_arp_add(struct app_link_params *link)
+{
+ struct rte_eth_ethertype_filter filter = {
+ .ether_type = ETHER_TYPE_ARP,
+ .flags = 0,
+ .queue = link->arp_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(link->pmd_id,
+ RTE_ETH_FILTER_ETHERTYPE,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_tcp_syn_add(struct app_link_params *link)
+{
+ struct rte_eth_syn_filter filter = {
+ .hig_pri = 1,
+ .queue = link->tcp_syn_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(link->pmd_id,
+ RTE_ETH_FILTER_SYN,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_ip_add(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = 0,
+ .proto_mask = 0, /* Disable */
+ .tcp_flags = 0,
+ .priority = 1, /* Lowest */
+ .queue = l1->ip_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_ip_del(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = 0,
+ .proto_mask = 0, /* Disable */
+ .tcp_flags = 0,
+ .priority = 1, /* Lowest */
+ .queue = l1->ip_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_DELETE,
+ &filter);
+}
+
+static inline int
+app_link_filter_tcp_add(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_TCP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->tcp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_tcp_del(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_TCP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->tcp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_DELETE,
+ &filter);
+}
+
+static inline int
+app_link_filter_udp_add(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_UDP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->udp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_udp_del(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_UDP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->udp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_DELETE,
+ &filter);
+}
+
+static inline int
+app_link_filter_sctp_add(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_SCTP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->sctp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_ADD,
+ &filter);
+}
+
+static inline int
+app_link_filter_sctp_del(struct app_link_params *l1, struct app_link_params *l2)
+{
+ struct rte_eth_ntuple_filter filter = {
+ .flags = RTE_5TUPLE_FLAGS,
+ .dst_ip = rte_bswap32(l2->ip),
+ .dst_ip_mask = UINT32_MAX, /* Enable */
+ .src_ip = 0,
+ .src_ip_mask = 0, /* Disable */
+ .dst_port = 0,
+ .dst_port_mask = 0, /* Disable */
+ .src_port = 0,
+ .src_port_mask = 0, /* Disable */
+ .proto = IPPROTO_SCTP,
+ .proto_mask = UINT8_MAX, /* Enable */
+ .tcp_flags = 0,
+ .priority = 2, /* Higher priority than IP */
+ .queue = l1->sctp_local_q,
+ };
+
+ return rte_eth_dev_filter_ctrl(l1->pmd_id,
+ RTE_ETH_FILTER_NTUPLE,
+ RTE_ETH_FILTER_DELETE,
+ &filter);
+}
+
+static void
+app_link_set_arp_filter(struct app_params *app, struct app_link_params *cp)
+{
+ if (cp->arp_q != 0) {
+ int status = app_link_filter_arp_add(cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32 "): "
+ "Adding ARP filter (queue = %" PRIu32 ")",
+ cp->name, cp->pmd_id, cp->arp_q);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding ARP filter "
+ "(queue = %" PRIu32 ") (%" PRId32 ")\n",
+ cp->name, cp->pmd_id, cp->arp_q, status);
+ }
+}
+
+static void
+app_link_set_tcp_syn_filter(struct app_params *app, struct app_link_params *cp)
+{
+ if (cp->tcp_syn_q != 0) {
+ int status = app_link_filter_tcp_syn_add(cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32 "): "
+ "Adding TCP SYN filter (queue = %" PRIu32 ")",
+ cp->name, cp->pmd_id, cp->tcp_syn_q);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding TCP SYN filter "
+ "(queue = %" PRIu32 ") (%" PRId32 ")\n",
+ cp->name, cp->pmd_id, cp->tcp_syn_q,
+ status);
+ }
+}
+
+static int
+app_link_is_virtual(struct app_link_params *p)
+{
+ uint32_t pmd_id = p->pmd_id;
+ struct rte_eth_dev *dev = &rte_eth_devices[pmd_id];
+
+ if (dev->dev_type == RTE_ETH_DEV_VIRTUAL)
+ return 1;
+
+ return 0;
+}
+
+void
+app_link_up_internal(struct app_params *app, struct app_link_params *cp)
+{
+ uint32_t i;
+ int status;
+
+ if (app_link_is_virtual(cp)) {
+ cp->state = 1;
+ return;
+ }
+
+ /* For each link, add filters for IP of current link */
+ if (cp->ip != 0) {
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *p = &app->link_params[i];
+
+ /* IP */
+ if (p->ip_local_q != 0) {
+ int status = app_link_filter_ip_add(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32 "): "
+ "Adding IP filter (queue= %" PRIu32
+ ", IP = 0x%08" PRIx32 ")",
+ p->name, p->pmd_id, p->ip_local_q,
+ cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding IP "
+ "filter (queue= %" PRIu32 ", "
+ "IP = 0x%08" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id,
+ p->ip_local_q, cp->ip, status);
+ }
+
+ /* TCP */
+ if (p->tcp_local_q != 0) {
+ int status = app_link_filter_tcp_add(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32 "): "
+ "Adding TCP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%08" PRIx32 ")",
+ p->name, p->pmd_id, p->tcp_local_q,
+ cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding TCP "
+ "filter (queue = %" PRIu32 ", "
+ "IP = 0x%08" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id,
+ p->tcp_local_q, cp->ip, status);
+ }
+
+ /* UDP */
+ if (p->udp_local_q != 0) {
+ int status = app_link_filter_udp_add(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32 "): "
+ "Adding UDP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%08" PRIx32 ")",
+ p->name, p->pmd_id, p->udp_local_q,
+ cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding UDP "
+ "filter (queue = %" PRIu32 ", "
+ "IP = 0x%08" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id,
+ p->udp_local_q, cp->ip, status);
+ }
+
+ /* SCTP */
+ if (p->sctp_local_q != 0) {
+ int status = app_link_filter_sctp_add(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32
+ "): Adding SCTP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%08" PRIx32 ")",
+ p->name, p->pmd_id, p->sctp_local_q,
+ cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32 "): "
+ "Error adding SCTP "
+ "filter (queue = %" PRIu32 ", "
+ "IP = 0x%08" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id,
+ p->sctp_local_q, cp->ip,
+ status);
+ }
+ }
+ }
+
+ /* PMD link up */
+ status = rte_eth_dev_set_link_up(cp->pmd_id);
+ if (status < 0)
+ rte_panic("%s (%" PRIu32 "): PMD set link up error %"
+ PRId32 "\n", cp->name, cp->pmd_id, status);
+
+ /* Mark link as UP */
+ cp->state = 1;
+}
+
+void
+app_link_down_internal(struct app_params *app, struct app_link_params *cp)
+{
+ uint32_t i;
+ int status;
+
+ if (app_link_is_virtual(cp)) {
+ cp->state = 0;
+ return;
+ }
+
+ /* PMD link down */
+ status = rte_eth_dev_set_link_down(cp->pmd_id);
+ if (status < 0)
+ rte_panic("%s (%" PRIu32 "): PMD set link down error %"
+ PRId32 "\n", cp->name, cp->pmd_id, status);
+
+ /* Mark link as DOWN */
+ cp->state = 0;
+
+ /* Return if current link IP is not valid */
+ if (cp->ip == 0)
+ return;
+
+ /* For each link, remove filters for IP of current link */
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *p = &app->link_params[i];
+
+ /* IP */
+ if (p->ip_local_q != 0) {
+ int status = app_link_filter_ip_del(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32
+ "): Deleting IP filter "
+ "(queue = %" PRIu32 ", IP = 0x%" PRIx32 ")",
+ p->name, p->pmd_id, p->ip_local_q, cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32
+ "): Error deleting IP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id, p->ip_local_q,
+ cp->ip, status);
+ }
+
+ /* TCP */
+ if (p->tcp_local_q != 0) {
+ int status = app_link_filter_tcp_del(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32
+ "): Deleting TCP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32 ")",
+ p->name, p->pmd_id, p->tcp_local_q, cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32
+ "): Error deleting TCP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id, p->tcp_local_q,
+ cp->ip, status);
+ }
+
+ /* UDP */
+ if (p->udp_local_q != 0) {
+ int status = app_link_filter_udp_del(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32
+ "): Deleting UDP filter "
+ "(queue = %" PRIu32 ", IP = 0x%" PRIx32 ")",
+ p->name, p->pmd_id, p->udp_local_q, cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32
+ "): Error deleting UDP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id, p->udp_local_q,
+ cp->ip, status);
+ }
+
+ /* SCTP */
+ if (p->sctp_local_q != 0) {
+ int status = app_link_filter_sctp_del(p, cp);
+
+ APP_LOG(app, LOW, "%s (%" PRIu32
+ "): Deleting SCTP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32 ")",
+ p->name, p->pmd_id, p->sctp_local_q, cp->ip);
+
+ if (status)
+ rte_panic("%s (%" PRIu32
+ "): Error deleting SCTP filter "
+ "(queue = %" PRIu32
+ ", IP = 0x%" PRIx32
+ ") (%" PRId32 ")\n",
+ p->name, p->pmd_id, p->sctp_local_q,
+ cp->ip, status);
+ }
+ }
+}
+
+static void
+app_check_link(struct app_params *app)
+{
+ uint32_t all_links_up, i;
+
+ all_links_up = 1;
+
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *p = &app->link_params[i];
+ struct rte_eth_link link_params;
+
+ memset(&link_params, 0, sizeof(link_params));
+ rte_eth_link_get(p->pmd_id, &link_params);
+
+ APP_LOG(app, HIGH, "%s (%" PRIu32 ") (%" PRIu32 " Gbps) %s",
+ p->name,
+ p->pmd_id,
+ link_params.link_speed / 1000,
+ link_params.link_status ? "UP" : "DOWN");
+
+ if (link_params.link_status == ETH_LINK_DOWN)
+ all_links_up = 0;
+ }
+
+ if (all_links_up == 0)
+ rte_panic("Some links are DOWN\n");
+}
+
+static uint32_t
+is_any_swq_frag_or_ras(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_swq; i++) {
+ struct app_pktq_swq_params *p = &app->swq_params[i];
+
+ if ((p->ipv4_frag == 1) || (p->ipv6_frag == 1) ||
+ (p->ipv4_ras == 1) || (p->ipv6_ras == 1))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void
+app_init_link_frag_ras(struct app_params *app)
+{
+ uint32_t i;
+
+ if (is_any_swq_frag_or_ras(app)) {
+ for (i = 0; i < app->n_pktq_hwq_out; i++) {
+ struct app_pktq_hwq_out_params *p_txq = &app->hwq_out_params[i];
+
+ p_txq->conf.txq_flags &= ~ETH_TXQ_FLAGS_NOMULTSEGS;
+ }
+ }
+}
+
+static inline int
+app_get_cpu_socket_id(uint32_t pmd_id)
+{
+ int status = rte_eth_dev_socket_id(pmd_id);
+
+ return (status != SOCKET_ID_ANY) ? status : 0;
+}
+
+static void
+app_init_link(struct app_params *app)
+{
+ uint32_t i;
+
+ app_init_link_frag_ras(app);
+
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *p_link = &app->link_params[i];
+ uint32_t link_id, n_hwq_in, n_hwq_out, j;
+ int status;
+
+ sscanf(p_link->name, "LINK%" PRIu32, &link_id);
+ n_hwq_in = app_link_get_n_rxq(app, p_link);
+ n_hwq_out = app_link_get_n_txq(app, p_link);
+
+ APP_LOG(app, HIGH, "Initializing %s (%" PRIu32") "
+ "(%" PRIu32 " RXQ, %" PRIu32 " TXQ) ...",
+ p_link->name,
+ p_link->pmd_id,
+ n_hwq_in,
+ n_hwq_out);
+
+ /* LINK */
+ status = rte_eth_dev_configure(
+ p_link->pmd_id,
+ n_hwq_in,
+ n_hwq_out,
+ &p_link->conf);
+ if (status < 0)
+ rte_panic("%s (%" PRId32 "): "
+ "init error (%" PRId32 ")\n",
+ p_link->name, p_link->pmd_id, status);
+
+ rte_eth_macaddr_get(p_link->pmd_id,
+ (struct ether_addr *) &p_link->mac_addr);
+
+ if (p_link->promisc)
+ rte_eth_promiscuous_enable(p_link->pmd_id);
+
+ /* RXQ */
+ for (j = 0; j < app->n_pktq_hwq_in; j++) {
+ struct app_pktq_hwq_in_params *p_rxq =
+ &app->hwq_in_params[j];
+ uint32_t rxq_link_id, rxq_queue_id;
+
+ sscanf(p_rxq->name, "RXQ%" PRIu32 ".%" PRIu32,
+ &rxq_link_id, &rxq_queue_id);
+ if (rxq_link_id != link_id)
+ continue;
+
+ status = rte_eth_rx_queue_setup(
+ p_link->pmd_id,
+ rxq_queue_id,
+ p_rxq->size,
+ app_get_cpu_socket_id(p_link->pmd_id),
+ &p_rxq->conf,
+ app->mempool[p_rxq->mempool_id]);
+ if (status < 0)
+ rte_panic("%s (%" PRIu32 "): "
+ "%s init error (%" PRId32 ")\n",
+ p_link->name,
+ p_link->pmd_id,
+ p_rxq->name,
+ status);
+ }
+
+ /* TXQ */
+ for (j = 0; j < app->n_pktq_hwq_out; j++) {
+ struct app_pktq_hwq_out_params *p_txq =
+ &app->hwq_out_params[j];
+ uint32_t txq_link_id, txq_queue_id;
+
+ sscanf(p_txq->name, "TXQ%" PRIu32 ".%" PRIu32,
+ &txq_link_id, &txq_queue_id);
+ if (txq_link_id != link_id)
+ continue;
+
+ status = rte_eth_tx_queue_setup(
+ p_link->pmd_id,
+ txq_queue_id,
+ p_txq->size,
+ app_get_cpu_socket_id(p_link->pmd_id),
+ &p_txq->conf);
+ if (status < 0)
+ rte_panic("%s (%" PRIu32 "): "
+ "%s init error (%" PRId32 ")\n",
+ p_link->name,
+ p_link->pmd_id,
+ p_txq->name,
+ status);
+ }
+
+ /* LINK START */
+ status = rte_eth_dev_start(p_link->pmd_id);
+ if (status < 0)
+ rte_panic("Cannot start %s (error %" PRId32 ")\n",
+ p_link->name, status);
+
+ /* LINK UP */
+ app_link_set_arp_filter(app, p_link);
+ app_link_set_tcp_syn_filter(app, p_link);
+ app_link_up_internal(app, p_link);
+ }
+
+ app_check_link(app);
+}
+
+static void
+app_init_swq(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_swq; i++) {
+ struct app_pktq_swq_params *p = &app->swq_params[i];
+ unsigned flags = 0;
+
+ if (app_swq_get_readers(app, p) == 1)
+ flags |= RING_F_SC_DEQ;
+ if (app_swq_get_writers(app, p) == 1)
+ flags |= RING_F_SP_ENQ;
+
+ APP_LOG(app, HIGH, "Initializing %s...", p->name);
+ app->swq[i] = rte_ring_create(
+ p->name,
+ p->size,
+ p->cpu_socket_id,
+ flags);
+
+ if (app->swq[i] == NULL)
+ rte_panic("%s init error\n", p->name);
+ }
+}
+
+static void
+app_init_tm(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pktq_tm; i++) {
+ struct app_pktq_tm_params *p_tm = &app->tm_params[i];
+ struct app_link_params *p_link;
+ struct rte_eth_link link_eth_params;
+ struct rte_sched_port *sched;
+ uint32_t n_subports, subport_id;
+ int status;
+
+ p_link = app_get_link_for_tm(app, p_tm);
+ /* LINK */
+ rte_eth_link_get(p_link->pmd_id, &link_eth_params);
+
+ /* TM */
+ p_tm->sched_port_params.name = p_tm->name;
+ p_tm->sched_port_params.socket =
+ app_get_cpu_socket_id(p_link->pmd_id);
+ p_tm->sched_port_params.rate =
+ (uint64_t) link_eth_params.link_speed * 1000 * 1000 / 8;
+
+ APP_LOG(app, HIGH, "Initializing %s ...", p_tm->name);
+ sched = rte_sched_port_config(&p_tm->sched_port_params);
+ if (sched == NULL)
+ rte_panic("%s init error\n", p_tm->name);
+ app->tm[i] = sched;
+
+ /* Subport */
+ n_subports = p_tm->sched_port_params.n_subports_per_port;
+ for (subport_id = 0; subport_id < n_subports; subport_id++) {
+ uint32_t n_pipes_per_subport, pipe_id;
+
+ status = rte_sched_subport_config(sched,
+ subport_id,
+ &p_tm->sched_subport_params[subport_id]);
+ if (status)
+ rte_panic("%s subport %" PRIu32
+ " init error (%" PRId32 ")\n",
+ p_tm->name, subport_id, status);
+
+ /* Pipe */
+ n_pipes_per_subport =
+ p_tm->sched_port_params.n_pipes_per_subport;
+ for (pipe_id = 0;
+ pipe_id < n_pipes_per_subport;
+ pipe_id++) {
+ int profile_id = p_tm->sched_pipe_to_profile[
+ subport_id * APP_MAX_SCHED_PIPES +
+ pipe_id];
+
+ if (profile_id == -1)
+ continue;
+
+ status = rte_sched_pipe_config(sched,
+ subport_id,
+ pipe_id,
+ profile_id);
+ if (status)
+ rte_panic("%s subport %" PRIu32
+ " pipe %" PRIu32
+ " (profile %" PRId32 ") "
+ "init error (% " PRId32 ")\n",
+ p_tm->name, subport_id, pipe_id,
+ profile_id, status);
+ }
+ }
+ }
+}
+
+static void
+app_init_msgq(struct app_params *app)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_msgq; i++) {
+ struct app_msgq_params *p = &app->msgq_params[i];
+
+ APP_LOG(app, HIGH, "Initializing %s ...", p->name);
+ app->msgq[i] = rte_ring_create(
+ p->name,
+ p->size,
+ p->cpu_socket_id,
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+
+ if (app->msgq[i] == NULL)
+ rte_panic("%s init error\n", p->name);
+ }
+}
+
+static void app_pipeline_params_get(struct app_params *app,
+ struct app_pipeline_params *p_in,
+ struct pipeline_params *p_out)
+{
+ uint32_t i;
+ uint32_t mempool_id;
+
+ snprintf(p_out->name, PIPELINE_NAME_SIZE, "%s", p_in->name);
+
+ p_out->socket_id = (int) p_in->socket_id;
+
+ p_out->log_level = app->log_level;
+
+ /* pktq_in */
+ p_out->n_ports_in = p_in->n_pktq_in;
+ for (i = 0; i < p_in->n_pktq_in; i++) {
+ struct app_pktq_in_params *in = &p_in->pktq_in[i];
+ struct pipeline_port_in_params *out = &p_out->port_in[i];
+
+ switch (in->type) {
+ case APP_PKTQ_IN_HWQ:
+ {
+ struct app_pktq_hwq_in_params *p_hwq_in =
+ &app->hwq_in_params[in->id];
+ struct app_link_params *p_link =
+ app_get_link_for_rxq(app, p_hwq_in);
+ uint32_t rxq_link_id, rxq_queue_id;
+
+ sscanf(p_hwq_in->name, "RXQ%" SCNu32 ".%" SCNu32,
+ &rxq_link_id,
+ &rxq_queue_id);
+
+ out->type = PIPELINE_PORT_IN_ETHDEV_READER;
+ out->params.ethdev.port_id = p_link->pmd_id;
+ out->params.ethdev.queue_id = rxq_queue_id;
+ out->burst_size = p_hwq_in->burst;
+ break;
+ }
+ case APP_PKTQ_IN_SWQ:
+ {
+ struct app_pktq_swq_params *swq_params = &app->swq_params[in->id];
+
+ if ((swq_params->ipv4_frag == 0) && (swq_params->ipv6_frag == 0)) {
+ if (app_swq_get_readers(app, swq_params) == 1) {
+ out->type = PIPELINE_PORT_IN_RING_READER;
+ out->params.ring.ring = app->swq[in->id];
+ out->burst_size = app->swq_params[in->id].burst_read;
+ } else {
+ out->type = PIPELINE_PORT_IN_RING_MULTI_READER;
+ out->params.ring_multi.ring = app->swq[in->id];
+ out->burst_size = swq_params->burst_read;
+ }
+ } else {
+ if (swq_params->ipv4_frag == 1) {
+ struct rte_port_ring_reader_ipv4_frag_params *params =
+ &out->params.ring_ipv4_frag;
+
+ out->type = PIPELINE_PORT_IN_RING_READER_IPV4_FRAG;
+ params->ring = app->swq[in->id];
+ params->mtu = swq_params->mtu;
+ params->metadata_size = swq_params->metadata_size;
+ params->pool_direct =
+ app->mempool[swq_params->mempool_direct_id];
+ params->pool_indirect =
+ app->mempool[swq_params->mempool_indirect_id];
+ out->burst_size = swq_params->burst_read;
+ } else {
+ struct rte_port_ring_reader_ipv6_frag_params *params =
+ &out->params.ring_ipv6_frag;
+
+ out->type = PIPELINE_PORT_IN_RING_READER_IPV6_FRAG;
+ params->ring = app->swq[in->id];
+ params->mtu = swq_params->mtu;
+ params->metadata_size = swq_params->metadata_size;
+ params->pool_direct =
+ app->mempool[swq_params->mempool_direct_id];
+ params->pool_indirect =
+ app->mempool[swq_params->mempool_indirect_id];
+ out->burst_size = swq_params->burst_read;
+ }
+ }
+ break;
+ }
+ case APP_PKTQ_IN_TM:
+ out->type = PIPELINE_PORT_IN_SCHED_READER;
+ out->params.sched.sched = app->tm[in->id];
+ out->burst_size = app->tm_params[in->id].burst_read;
+ break;
+ case APP_PKTQ_IN_SOURCE:
+ mempool_id = app->source_params[in->id].mempool_id;
+ out->type = PIPELINE_PORT_IN_SOURCE;
+ out->params.source.mempool = app->mempool[mempool_id];
+ out->burst_size = app->source_params[in->id].burst;
+
+#ifdef RTE_NEXT_ABI
+ if (app->source_params[in->id].file_name
+ != NULL) {
+ out->params.source.file_name = strdup(
+ app->source_params[in->id].
+ file_name);
+ if (out->params.source.file_name == NULL) {
+ out->params.source.
+ n_bytes_per_pkt = 0;
+ break;
+ }
+ out->params.source.n_bytes_per_pkt =
+ app->source_params[in->id].
+ n_bytes_per_pkt;
+ }
+#endif
+
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* pktq_out */
+ p_out->n_ports_out = p_in->n_pktq_out;
+ for (i = 0; i < p_in->n_pktq_out; i++) {
+ struct app_pktq_out_params *in = &p_in->pktq_out[i];
+ struct pipeline_port_out_params *out = &p_out->port_out[i];
+
+ switch (in->type) {
+ case APP_PKTQ_OUT_HWQ:
+ {
+ struct app_pktq_hwq_out_params *p_hwq_out =
+ &app->hwq_out_params[in->id];
+ struct app_link_params *p_link =
+ app_get_link_for_txq(app, p_hwq_out);
+ uint32_t txq_link_id, txq_queue_id;
+
+ sscanf(p_hwq_out->name,
+ "TXQ%" SCNu32 ".%" SCNu32,
+ &txq_link_id,
+ &txq_queue_id);
+
+ if (p_hwq_out->dropless == 0) {
+ struct rte_port_ethdev_writer_params *params =
+ &out->params.ethdev;
+
+ out->type = PIPELINE_PORT_OUT_ETHDEV_WRITER;
+ params->port_id = p_link->pmd_id;
+ params->queue_id = txq_queue_id;
+ params->tx_burst_sz =
+ app->hwq_out_params[in->id].burst;
+ } else {
+ struct rte_port_ethdev_writer_nodrop_params
+ *params = &out->params.ethdev_nodrop;
+
+ out->type =
+ PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP;
+ params->port_id = p_link->pmd_id;
+ params->queue_id = txq_queue_id;
+ params->tx_burst_sz = p_hwq_out->burst;
+ params->n_retries = p_hwq_out->n_retries;
+ }
+ break;
+ }
+ case APP_PKTQ_OUT_SWQ:
+ {
+ struct app_pktq_swq_params *swq_params = &app->swq_params[in->id];
+
+ if ((swq_params->ipv4_ras == 0) && (swq_params->ipv6_ras == 0)) {
+ if (app_swq_get_writers(app, swq_params) == 1) {
+ if (app->swq_params[in->id].dropless == 0) {
+ struct rte_port_ring_writer_params *params =
+ &out->params.ring;
+
+ out->type = PIPELINE_PORT_OUT_RING_WRITER;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz =
+ app->swq_params[in->id].burst_write;
+ } else {
+ struct rte_port_ring_writer_nodrop_params
+ *params = &out->params.ring_nodrop;
+
+ out->type =
+ PIPELINE_PORT_OUT_RING_WRITER_NODROP;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz =
+ app->swq_params[in->id].burst_write;
+ params->n_retries =
+ app->swq_params[in->id].n_retries;
+ }
+ } else {
+ if (swq_params->dropless == 0) {
+ struct rte_port_ring_multi_writer_params *params =
+ &out->params.ring_multi;
+
+ out->type = PIPELINE_PORT_OUT_RING_MULTI_WRITER;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz = swq_params->burst_write;
+ } else {
+ struct rte_port_ring_multi_writer_nodrop_params
+ *params = &out->params.ring_multi_nodrop;
+
+ out->type = PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz = swq_params->burst_write;
+ params->n_retries = swq_params->n_retries;
+ }
+ }
+ } else {
+ if (swq_params->ipv4_ras == 1) {
+ struct rte_port_ring_writer_ipv4_ras_params *params =
+ &out->params.ring_ipv4_ras;
+
+ out->type = PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz = swq_params->burst_write;
+ } else {
+ struct rte_port_ring_writer_ipv6_ras_params *params =
+ &out->params.ring_ipv6_ras;
+
+ out->type = PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS;
+ params->ring = app->swq[in->id];
+ params->tx_burst_sz = swq_params->burst_write;
+ }
+ }
+ break;
+ }
+ case APP_PKTQ_OUT_TM: {
+ struct rte_port_sched_writer_params *params =
+ &out->params.sched;
+
+ out->type = PIPELINE_PORT_OUT_SCHED_WRITER;
+ params->sched = app->tm[in->id];
+ params->tx_burst_sz =
+ app->tm_params[in->id].burst_write;
+ break;
+ }
+ case APP_PKTQ_OUT_SINK:
+ out->type = PIPELINE_PORT_OUT_SINK;
+ if (app->sink_params[in->id].file_name != NULL) {
+ out->params.sink.file_name = strdup(
+ app->sink_params[in->id].
+ file_name);
+ if (out->params.sink.file_name == NULL) {
+ out->params.sink.max_n_pkts = 0;
+ break;
+ }
+ out->params.sink.max_n_pkts =
+ app->sink_params[in->id].
+ n_pkts_to_dump;
+ } else {
+ out->params.sink.file_name = NULL;
+ out->params.sink.max_n_pkts = 0;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ /* msgq */
+ p_out->n_msgq = p_in->n_msgq_in;
+
+ for (i = 0; i < p_in->n_msgq_in; i++)
+ p_out->msgq_in[i] = app->msgq[p_in->msgq_in[i]];
+
+ for (i = 0; i < p_in->n_msgq_out; i++)
+ p_out->msgq_out[i] = app->msgq[p_in->msgq_out[i]];
+
+ /* args */
+ p_out->n_args = p_in->n_args;
+ for (i = 0; i < p_in->n_args; i++) {
+ p_out->args_name[i] = p_in->args_name[i];
+ p_out->args_value[i] = p_in->args_value[i];
+ }
+}
+
+static void
+app_init_pipelines(struct app_params *app)
+{
+ uint32_t p_id;
+
+ for (p_id = 0; p_id < app->n_pipelines; p_id++) {
+ struct app_pipeline_params *params =
+ &app->pipeline_params[p_id];
+ struct app_pipeline_data *data = &app->pipeline_data[p_id];
+ struct pipeline_type *ptype;
+ struct pipeline_params pp;
+
+ APP_LOG(app, HIGH, "Initializing %s ...", params->name);
+
+ ptype = app_pipeline_type_find(app, params->type);
+ if (ptype == NULL)
+ rte_panic("Init error: Unknown pipeline type \"%s\"\n",
+ params->type);
+
+ app_pipeline_params_get(app, params, &pp);
+
+ /* Back-end */
+ data->be = NULL;
+ if (ptype->be_ops->f_init) {
+ data->be = ptype->be_ops->f_init(&pp, (void *) app);
+
+ if (data->be == NULL)
+ rte_panic("Pipeline instance \"%s\" back-end "
+ "init error\n", params->name);
+ }
+
+ /* Front-end */
+ data->fe = NULL;
+ if (ptype->fe_ops->f_init) {
+ data->fe = ptype->fe_ops->f_init(&pp, (void *) app);
+
+ if (data->fe == NULL)
+ rte_panic("Pipeline instance \"%s\" front-end "
+ "init error\n", params->name);
+ }
+
+ data->ptype = ptype;
+
+ data->timer_period = (rte_get_tsc_hz() *
+ params->timer_period) / 100;
+ }
+}
+
+static void
+app_init_threads(struct app_params *app)
+{
+ uint64_t time = rte_get_tsc_cycles();
+ uint32_t p_id;
+
+ for (p_id = 0; p_id < app->n_pipelines; p_id++) {
+ struct app_pipeline_params *params =
+ &app->pipeline_params[p_id];
+ struct app_pipeline_data *data = &app->pipeline_data[p_id];
+ struct pipeline_type *ptype;
+ struct app_thread_data *t;
+ struct app_thread_pipeline_data *p;
+ int lcore_id;
+
+ lcore_id = cpu_core_map_get_lcore_id(app->core_map,
+ params->socket_id,
+ params->core_id,
+ params->hyper_th_id);
+
+ if (lcore_id < 0)
+ rte_panic("Invalid core s%" PRIu32 "c%" PRIu32 "%s\n",
+ params->socket_id,
+ params->core_id,
+ (params->hyper_th_id) ? "h" : "");
+
+ t = &app->thread_data[lcore_id];
+
+ t->timer_period = (rte_get_tsc_hz() * APP_THREAD_TIMER_PERIOD) / 1000;
+ t->thread_req_deadline = time + t->timer_period;
+
+ t->headroom_cycles = 0;
+ t->headroom_time = rte_get_tsc_cycles();
+ t->headroom_ratio = 0.0;
+
+ t->msgq_in = app_thread_msgq_in_get(app,
+ params->socket_id,
+ params->core_id,
+ params->hyper_th_id);
+ if (t->msgq_in == NULL)
+ rte_panic("Init error: Cannot find MSGQ_IN for thread %" PRId32,
+ lcore_id);
+
+ t->msgq_out = app_thread_msgq_out_get(app,
+ params->socket_id,
+ params->core_id,
+ params->hyper_th_id);
+ if (t->msgq_out == NULL)
+ rte_panic("Init error: Cannot find MSGQ_OUT for thread %" PRId32,
+ lcore_id);
+
+ ptype = app_pipeline_type_find(app, params->type);
+ if (ptype == NULL)
+ rte_panic("Init error: Unknown pipeline "
+ "type \"%s\"\n", params->type);
+
+ p = (ptype->be_ops->f_run == NULL) ?
+ &t->regular[t->n_regular] :
+ &t->custom[t->n_custom];
+
+ p->pipeline_id = p_id;
+ p->be = data->be;
+ p->f_run = ptype->be_ops->f_run;
+ p->f_timer = ptype->be_ops->f_timer;
+ p->timer_period = data->timer_period;
+ p->deadline = time + data->timer_period;
+
+ data->enabled = 1;
+
+ if (ptype->be_ops->f_run == NULL)
+ t->n_regular++;
+ else
+ t->n_custom++;
+ }
+}
+
+int app_init(struct app_params *app)
+{
+ app_init_core_map(app);
+ app_init_core_mask(app);
+
+ app_init_eal(app);
+ app_init_mempool(app);
+ app_init_link(app);
+ app_init_swq(app);
+ app_init_tm(app);
+ app_init_msgq(app);
+
+ app_pipeline_common_cmd_push(app);
+ app_pipeline_thread_cmd_push(app);
+ app_pipeline_type_register(app, &pipeline_master);
+ app_pipeline_type_register(app, &pipeline_passthrough);
+ app_pipeline_type_register(app, &pipeline_flow_classification);
+ app_pipeline_type_register(app, &pipeline_flow_actions);
+ app_pipeline_type_register(app, &pipeline_firewall);
+ app_pipeline_type_register(app, &pipeline_routing);
+
+ app_init_pipelines(app);
+ app_init_threads(app);
+
+ return 0;
+}
+
+static int
+app_pipeline_type_cmd_push(struct app_params *app,
+ struct pipeline_type *ptype)
+{
+ cmdline_parse_ctx_t *cmds;
+ uint32_t n_cmds, i;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (ptype == NULL))
+ return -EINVAL;
+
+ n_cmds = pipeline_type_cmds_count(ptype);
+ if (n_cmds == 0)
+ return 0;
+
+ cmds = ptype->fe_ops->cmds;
+
+ /* Check for available slots in the application commands array */
+ if (n_cmds > APP_MAX_CMDS - app->n_cmds)
+ return -ENOMEM;
+
+ /* Push pipeline commands into the application */
+ memcpy(&app->cmds[app->n_cmds],
+ cmds,
+ n_cmds * sizeof(cmdline_parse_ctx_t));
+
+ for (i = 0; i < n_cmds; i++)
+ app->cmds[app->n_cmds + i]->data = app;
+
+ app->n_cmds += n_cmds;
+ app->cmds[app->n_cmds] = NULL;
+
+ return 0;
+}
+
+int
+app_pipeline_type_register(struct app_params *app, struct pipeline_type *ptype)
+{
+ uint32_t n_cmds, i;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (ptype == NULL) ||
+ (ptype->name == NULL) ||
+ (strlen(ptype->name) == 0) ||
+ (ptype->be_ops->f_init == NULL) ||
+ (ptype->be_ops->f_timer == NULL))
+ return -EINVAL;
+
+ /* Check for duplicate entry */
+ for (i = 0; i < app->n_pipeline_types; i++)
+ if (strcmp(app->pipeline_type[i].name, ptype->name) == 0)
+ return -EEXIST;
+
+ /* Check for resource availability */
+ n_cmds = pipeline_type_cmds_count(ptype);
+ if ((app->n_pipeline_types == APP_MAX_PIPELINE_TYPES) ||
+ (n_cmds > APP_MAX_CMDS - app->n_cmds))
+ return -ENOMEM;
+
+ /* Copy pipeline type */
+ memcpy(&app->pipeline_type[app->n_pipeline_types++],
+ ptype,
+ sizeof(struct pipeline_type));
+
+ /* Copy CLI commands */
+ if (n_cmds)
+ app_pipeline_type_cmd_push(app, ptype);
+
+ return 0;
+}
+
+struct
+pipeline_type *app_pipeline_type_find(struct app_params *app, char *name)
+{
+ uint32_t i;
+
+ for (i = 0; i < app->n_pipeline_types; i++)
+ if (strcmp(app->pipeline_type[i].name, name) == 0)
+ return &app->pipeline_type[i];
+
+ return NULL;
+}
diff --git a/examples/ip_pipeline/main.c b/examples/ip_pipeline/main.c
new file mode 100644
index 00000000..4944dcfb
--- /dev/null
+++ b/examples/ip_pipeline/main.c
@@ -0,0 +1,64 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "app.h"
+
+static struct app_params app;
+
+int
+main(int argc, char **argv)
+{
+ rte_openlog_stream(stderr);
+
+ /* Config */
+ app_config_init(&app);
+
+ app_config_args(&app, argc, argv);
+
+ app_config_preproc(&app);
+
+ app_config_parse(&app, app.parser_file);
+
+ app_config_check(&app);
+
+ /* Init */
+ app_init(&app);
+
+ /* Run-time */
+ rte_eal_mp_remote_launch(
+ app_thread,
+ (void *) &app,
+ CALL_MASTER);
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/parser.h b/examples/ip_pipeline/parser.h
new file mode 100644
index 00000000..58b59daf
--- /dev/null
+++ b/examples/ip_pipeline/parser.h
@@ -0,0 +1,50 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PARSER_H__
+#define __INCLUDE_PARSER_H__
+
+int
+parser_read_arg_bool(const char *p);
+
+int
+parser_read_uint64(uint64_t *value, const char *p);
+
+int
+parser_read_uint32(uint32_t *value, const char *p);
+
+int
+parse_hex_string(char *src, uint8_t *dst, uint32_t *size);
+
+#endif
+
diff --git a/examples/ip_pipeline/pipeline.h b/examples/ip_pipeline/pipeline.h
new file mode 100644
index 00000000..dab9c36d
--- /dev/null
+++ b/examples/ip_pipeline/pipeline.h
@@ -0,0 +1,93 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_H__
+#define __INCLUDE_PIPELINE_H__
+
+#include <cmdline_parse.h>
+
+#include "pipeline_be.h"
+
+/*
+ * Pipeline type front-end operations
+ */
+
+typedef void* (*pipeline_fe_op_init)(struct pipeline_params *params, void *arg);
+
+typedef int (*pipeline_fe_op_free)(void *pipeline);
+
+struct pipeline_fe_ops {
+ pipeline_fe_op_init f_init;
+ pipeline_fe_op_free f_free;
+ cmdline_parse_ctx_t *cmds;
+};
+
+/*
+ * Pipeline type
+ */
+
+struct pipeline_type {
+ const char *name;
+
+ /* pipeline back-end */
+ struct pipeline_be_ops *be_ops;
+
+ /* pipeline front-end */
+ struct pipeline_fe_ops *fe_ops;
+};
+
+static inline uint32_t
+pipeline_type_cmds_count(struct pipeline_type *ptype)
+{
+ cmdline_parse_ctx_t *cmds;
+ uint32_t n_cmds;
+
+ if (ptype->fe_ops == NULL)
+ return 0;
+
+ cmds = ptype->fe_ops->cmds;
+ if (cmds == NULL)
+ return 0;
+
+ for (n_cmds = 0; cmds[n_cmds]; n_cmds++);
+
+ return n_cmds;
+}
+
+int
+parse_pipeline_core(uint32_t *socket,
+ uint32_t *core,
+ uint32_t *ht,
+ const char *entry);
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/hash_func.h b/examples/ip_pipeline/pipeline/hash_func.h
new file mode 100644
index 00000000..9db7173f
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/hash_func.h
@@ -0,0 +1,351 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __INCLUDE_HASH_FUNC_H__
+#define __INCLUDE_HASH_FUNC_H__
+
+static inline uint64_t
+hash_xor_key8(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0;
+
+ xor0 = seed ^ k[0];
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key16(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key24(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+
+ xor0 ^= k[2];
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key32(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0, xor1;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+ xor1 = k[2] ^ k[3];
+
+ xor0 ^= xor1;
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key40(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0, xor1;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+ xor1 = k[2] ^ k[3];
+
+ xor0 ^= xor1;
+
+ xor0 ^= k[4];
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key48(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0, xor1, xor2;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+ xor1 = k[2] ^ k[3];
+ xor2 = k[4] ^ k[5];
+
+ xor0 ^= xor1;
+
+ xor0 ^= xor2;
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key56(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0, xor1, xor2;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+ xor1 = k[2] ^ k[3];
+ xor2 = k[4] ^ k[5];
+
+ xor0 ^= xor1;
+ xor2 ^= k[6];
+
+ xor0 ^= xor2;
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+static inline uint64_t
+hash_xor_key64(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t xor0, xor1, xor2, xor3;
+
+ xor0 = (k[0] ^ seed) ^ k[1];
+ xor1 = k[2] ^ k[3];
+ xor2 = k[4] ^ k[5];
+ xor3 = k[6] ^ k[7];
+
+ xor0 ^= xor1;
+ xor2 ^= xor3;
+
+ xor0 ^= xor2;
+
+ return (xor0 >> 32) ^ xor0;
+}
+
+#if defined(RTE_ARCH_X86_64) && defined(RTE_MACHINE_CPUFLAG_SSE4_2)
+
+#include <x86intrin.h>
+
+static inline uint64_t
+hash_crc_key8(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t crc0;
+
+ crc0 = _mm_crc32_u64(seed, k[0]);
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key16(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, crc0, crc1;
+
+ k0 = k[0];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key24(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, crc0, crc1;
+
+ k0 = k[0];
+ k2 = k[2];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc0 = _mm_crc32_u64(crc0, k2);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key32(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+ k0 = k[0];
+ k2 = k[2];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc2 = _mm_crc32_u64(k2, k[3]);
+ crc3 = k2 >> 32;
+
+ crc0 = _mm_crc32_u64(crc0, crc1);
+ crc1 = _mm_crc32_u64(crc2, crc3);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key40(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, crc0, crc1, crc2, crc3;
+
+ k0 = k[0];
+ k2 = k[2];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc2 = _mm_crc32_u64(k2, k[3]);
+ crc3 = _mm_crc32_u64(k2 >> 32, k[4]);
+
+ crc0 = _mm_crc32_u64(crc0, crc1);
+ crc1 = _mm_crc32_u64(crc2, crc3);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key48(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, k5, crc0, crc1, crc2, crc3;
+
+ k0 = k[0];
+ k2 = k[2];
+ k5 = k[5];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc2 = _mm_crc32_u64(k2, k[3]);
+ crc3 = _mm_crc32_u64(k2 >> 32, k[4]);
+
+ crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+ crc1 = _mm_crc32_u64(crc3, k5);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key56(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+ k0 = k[0];
+ k2 = k[2];
+ k5 = k[5];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc2 = _mm_crc32_u64(k2, k[3]);
+ crc3 = _mm_crc32_u64(k2 >> 32, k[4]);
+
+ crc4 = _mm_crc32_u64(k5, k[6]);
+ crc5 = k5 >> 32;
+
+ crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+ crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+static inline uint64_t
+hash_crc_key64(void *key, __rte_unused uint32_t key_size, uint64_t seed)
+{
+ uint64_t *k = key;
+ uint64_t k0, k2, k5, crc0, crc1, crc2, crc3, crc4, crc5;
+
+ k0 = k[0];
+ k2 = k[2];
+ k5 = k[5];
+
+ crc0 = _mm_crc32_u64(k0, seed);
+ crc1 = _mm_crc32_u64(k0 >> 32, k[1]);
+
+ crc2 = _mm_crc32_u64(k2, k[3]);
+ crc3 = _mm_crc32_u64(k2 >> 32, k[4]);
+
+ crc4 = _mm_crc32_u64(k5, k[6]);
+ crc5 = _mm_crc32_u64(k5 >> 32, k[7]);
+
+ crc0 = _mm_crc32_u64(crc0, (crc1 << 32) ^ crc2);
+ crc1 = _mm_crc32_u64(crc3, (crc4 << 32) ^ crc5);
+
+ crc0 ^= crc1;
+
+ return crc0;
+}
+
+#define hash_default_key8 hash_crc_key8
+#define hash_default_key16 hash_crc_key16
+#define hash_default_key24 hash_crc_key24
+#define hash_default_key32 hash_crc_key32
+#define hash_default_key40 hash_crc_key40
+#define hash_default_key48 hash_crc_key48
+#define hash_default_key56 hash_crc_key56
+#define hash_default_key64 hash_crc_key64
+
+#else
+
+#define hash_default_key8 hash_xor_key8
+#define hash_default_key16 hash_xor_key16
+#define hash_default_key24 hash_xor_key24
+#define hash_default_key32 hash_xor_key32
+#define hash_default_key40 hash_xor_key40
+#define hash_default_key48 hash_xor_key48
+#define hash_default_key56 hash_xor_key56
+#define hash_default_key64 hash_xor_key64
+
+#endif
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_actions_common.h b/examples/ip_pipeline/pipeline/pipeline_actions_common.h
new file mode 100644
index 00000000..ab08612d
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_actions_common.h
@@ -0,0 +1,231 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __INCLUDE_PIPELINE_ACTIONS_COMMON_H__
+#define __INCLUDE_PIPELINE_ACTIONS_COMMON_H__
+
+#include <stdint.h>
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_mbuf.h>
+#include <rte_pipeline.h>
+
+#define PIPELINE_PORT_IN_AH(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ __rte_unused struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint32_t n_pkts, \
+ void *arg) \
+{ \
+ uint32_t i; \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \
+ f_pkt4_work(&pkts[i], arg); \
+ \
+ for ( ; i < n_pkts; i++) \
+ f_pkt_work(pkts[i], arg); \
+ \
+ return 0; \
+}
+
+#define PIPELINE_PORT_IN_AH_HIJACK_ALL(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint32_t n_pkts, \
+ void *arg) \
+{ \
+ uint64_t pkt_mask = RTE_LEN2MASK(n_pkts, uint64_t); \
+ uint32_t i; \
+ \
+ rte_pipeline_ah_packet_hijack(p, pkt_mask); \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \
+ f_pkt4_work(&pkts[i], arg); \
+ \
+ for ( ; i < n_pkts; i++) \
+ f_pkt_work(pkts[i], arg); \
+ \
+ return 0; \
+}
+
+#define PIPELINE_TABLE_AH_HIT(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ __rte_unused struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint64_t pkts_in_mask, \
+ struct rte_pipeline_table_entry **entries, \
+ void *arg) \
+{ \
+ if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \
+ uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \
+ uint32_t i; \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \
+ f_pkt4_work(&pkts[i], &entries[i], arg); \
+ \
+ for ( ; i < n_pkts; i++) \
+ f_pkt_work(pkts[i], entries[i], arg); \
+ } else \
+ for ( ; pkts_in_mask; ) { \
+ uint32_t pos = __builtin_ctzll(pkts_in_mask); \
+ uint64_t pkt_mask = 1LLU << pos; \
+ \
+ pkts_in_mask &= ~pkt_mask; \
+ f_pkt_work(pkts[pos], entries[pos], arg); \
+ } \
+ \
+ return 0; \
+}
+
+#define PIPELINE_TABLE_AH_MISS(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ __rte_unused struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint64_t pkts_in_mask, \
+ struct rte_pipeline_table_entry *entry, \
+ void *arg) \
+{ \
+ if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \
+ uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \
+ uint32_t i; \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) \
+ f_pkt4_work(&pkts[i], entry, arg); \
+ \
+ for ( ; i < n_pkts; i++) \
+ f_pkt_work(pkts[i], entry, arg); \
+ } else \
+ for ( ; pkts_in_mask; ) { \
+ uint32_t pos = __builtin_ctzll(pkts_in_mask); \
+ uint64_t pkt_mask = 1LLU << pos; \
+ \
+ pkts_in_mask &= ~pkt_mask; \
+ f_pkt_work(pkts[pos], entry, arg); \
+ } \
+ \
+ return 0; \
+}
+
+#define PIPELINE_TABLE_AH_HIT_DROP_TIME(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint64_t pkts_mask, \
+ struct rte_pipeline_table_entry **entries, \
+ void *arg) \
+{ \
+ uint64_t pkts_in_mask = pkts_mask; \
+ uint64_t pkts_out_mask = pkts_mask; \
+ uint64_t time = rte_rdtsc(); \
+ \
+ if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \
+ uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \
+ uint32_t i; \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) { \
+ uint64_t mask = f_pkt4_work(&pkts[i], \
+ &entries[i], arg, time); \
+ pkts_out_mask ^= mask << i; \
+ } \
+ \
+ for ( ; i < n_pkts; i++) { \
+ uint64_t mask = f_pkt_work(pkts[i], \
+ entries[i], arg, time); \
+ pkts_out_mask ^= mask << i; \
+ } \
+ } else \
+ for ( ; pkts_in_mask; ) { \
+ uint32_t pos = __builtin_ctzll(pkts_in_mask); \
+ uint64_t pkt_mask = 1LLU << pos; \
+ uint64_t mask = f_pkt_work(pkts[pos], \
+ entries[pos], arg, time); \
+ \
+ pkts_in_mask &= ~pkt_mask; \
+ pkts_out_mask ^= mask << pos; \
+ } \
+ \
+ rte_pipeline_ah_packet_drop(p, pkts_out_mask ^ pkts_mask); \
+ \
+ return 0; \
+}
+
+#define PIPELINE_TABLE_AH_MISS_DROP_TIME(f_ah, f_pkt_work, f_pkt4_work) \
+static int \
+f_ah( \
+ struct rte_pipeline *p, \
+ struct rte_mbuf **pkts, \
+ uint64_t pkts_mask, \
+ struct rte_pipeline_table_entry *entry, \
+ void *arg) \
+{ \
+ uint64_t pkts_in_mask = pkts_mask; \
+ uint64_t pkts_out_mask = pkts_mask; \
+ uint64_t time = rte_rdtsc(); \
+ \
+ if ((pkts_in_mask & (pkts_in_mask + 1)) == 0) { \
+ uint64_t n_pkts = __builtin_popcountll(pkts_in_mask); \
+ uint32_t i; \
+ \
+ for (i = 0; i < (n_pkts & (~0x3LLU)); i += 4) { \
+ uint64_t mask = f_pkt4_work(&pkts[i], \
+ entry, arg, time); \
+ pkts_out_mask ^= mask << i; \
+ } \
+ \
+ for ( ; i < n_pkts; i++) { \
+ uint64_t mask = f_pkt_work(pkts[i], entry, arg, time);\
+ pkts_out_mask ^= mask << i; \
+ } \
+ } else \
+ for ( ; pkts_in_mask; ) { \
+ uint32_t pos = __builtin_ctzll(pkts_in_mask); \
+ uint64_t pkt_mask = 1LLU << pos; \
+ uint64_t mask = f_pkt_work(pkts[pos], \
+ entry, arg, time); \
+ \
+ pkts_in_mask &= ~pkt_mask; \
+ pkts_out_mask ^= mask << pos; \
+ } \
+ \
+ rte_pipeline_ah_packet_drop(p, pkts_out_mask ^ pkts_mask); \
+ \
+ return 0; \
+}
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_common_be.c b/examples/ip_pipeline/pipeline/pipeline_common_be.c
new file mode 100644
index 00000000..50dcb694
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_common_be.c
@@ -0,0 +1,206 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_malloc.h>
+
+#include "pipeline_common_be.h"
+
+void *
+pipeline_msg_req_ping_handler(__rte_unused struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_msg_rsp *rsp = msg;
+
+ rsp->status = 0; /* OK */
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_stats_port_in_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_stats_msg_req *req = msg;
+ struct pipeline_stats_port_in_msg_rsp *rsp = msg;
+ uint32_t port_id;
+
+ /* Check request */
+ if (req->id >= p->n_ports_in) {
+ rsp->status = -1;
+ return rsp;
+ }
+ port_id = p->port_in_id[req->id];
+
+ /* Process request */
+ rsp->status = rte_pipeline_port_in_stats_read(p->p,
+ port_id,
+ &rsp->stats,
+ 1);
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_stats_port_out_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_stats_msg_req *req = msg;
+ struct pipeline_stats_port_out_msg_rsp *rsp = msg;
+ uint32_t port_id;
+
+ /* Check request */
+ if (req->id >= p->n_ports_out) {
+ rsp->status = -1;
+ return rsp;
+ }
+ port_id = p->port_out_id[req->id];
+
+ /* Process request */
+ rsp->status = rte_pipeline_port_out_stats_read(p->p,
+ port_id,
+ &rsp->stats,
+ 1);
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_stats_table_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_stats_msg_req *req = msg;
+ struct pipeline_stats_table_msg_rsp *rsp = msg;
+ uint32_t table_id;
+
+ /* Check request */
+ if (req->id >= p->n_tables) {
+ rsp->status = -1;
+ return rsp;
+ }
+ table_id = p->table_id[req->id];
+
+ /* Process request */
+ rsp->status = rte_pipeline_table_stats_read(p->p,
+ table_id,
+ &rsp->stats,
+ 1);
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_port_in_enable_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_port_in_msg_req *req = msg;
+ struct pipeline_msg_rsp *rsp = msg;
+ uint32_t port_id;
+
+ /* Check request */
+ if (req->port_id >= p->n_ports_in) {
+ rsp->status = -1;
+ return rsp;
+ }
+ port_id = p->port_in_id[req->port_id];
+
+ /* Process request */
+ rsp->status = rte_pipeline_port_in_enable(p->p,
+ port_id);
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_port_in_disable_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_port_in_msg_req *req = msg;
+ struct pipeline_msg_rsp *rsp = msg;
+ uint32_t port_id;
+
+ /* Check request */
+ if (req->port_id >= p->n_ports_in) {
+ rsp->status = -1;
+ return rsp;
+ }
+ port_id = p->port_in_id[req->port_id];
+
+ /* Process request */
+ rsp->status = rte_pipeline_port_in_disable(p->p,
+ port_id);
+
+ return rsp;
+}
+
+void *
+pipeline_msg_req_invalid_handler(__rte_unused struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_msg_rsp *rsp = msg;
+
+ rsp->status = -1; /* Error */
+
+ return rsp;
+}
+
+int
+pipeline_msg_req_handle(struct pipeline *p)
+{
+ uint32_t msgq_id;
+
+ for (msgq_id = 0; msgq_id < p->n_msgq; msgq_id++) {
+ for ( ; ; ) {
+ struct pipeline_msg_req *req;
+ pipeline_msg_req_handler f_handle;
+
+ req = pipeline_msg_recv(p, msgq_id);
+ if (req == NULL)
+ break;
+
+ f_handle = (req->type < PIPELINE_MSG_REQS) ?
+ p->handlers[req->type] :
+ pipeline_msg_req_invalid_handler;
+
+ if (f_handle == NULL)
+ f_handle = pipeline_msg_req_invalid_handler;
+
+ pipeline_msg_send(p,
+ msgq_id,
+ f_handle(p, (void *) req));
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/pipeline/pipeline_common_be.h b/examples/ip_pipeline/pipeline/pipeline_common_be.h
new file mode 100644
index 00000000..07fdca09
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_common_be.h
@@ -0,0 +1,163 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_COMMON_BE_H__
+#define __INCLUDE_PIPELINE_COMMON_BE_H__
+
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_pipeline.h>
+
+#include "pipeline_be.h"
+
+struct pipeline;
+
+enum pipeline_msg_req_type {
+ PIPELINE_MSG_REQ_PING = 0,
+ PIPELINE_MSG_REQ_STATS_PORT_IN,
+ PIPELINE_MSG_REQ_STATS_PORT_OUT,
+ PIPELINE_MSG_REQ_STATS_TABLE,
+ PIPELINE_MSG_REQ_PORT_IN_ENABLE,
+ PIPELINE_MSG_REQ_PORT_IN_DISABLE,
+ PIPELINE_MSG_REQ_CUSTOM,
+ PIPELINE_MSG_REQS
+};
+
+typedef void *(*pipeline_msg_req_handler)(struct pipeline *p, void *msg);
+
+struct pipeline {
+ struct rte_pipeline *p;
+ uint32_t port_in_id[PIPELINE_MAX_PORT_IN];
+ uint32_t port_out_id[PIPELINE_MAX_PORT_OUT];
+ uint32_t table_id[PIPELINE_MAX_TABLES];
+ struct rte_ring *msgq_in[PIPELINE_MAX_MSGQ_IN];
+ struct rte_ring *msgq_out[PIPELINE_MAX_MSGQ_OUT];
+
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+ uint32_t n_tables;
+ uint32_t n_msgq;
+
+ pipeline_msg_req_handler handlers[PIPELINE_MSG_REQS];
+ char name[PIPELINE_NAME_SIZE];
+ uint32_t log_level;
+};
+
+enum pipeline_log_level {
+ PIPELINE_LOG_LEVEL_HIGH = 1,
+ PIPELINE_LOG_LEVEL_LOW,
+ PIPELINE_LOG_LEVELS
+};
+
+#define PLOG(p, level, fmt, ...) \
+do { \
+ if (p->log_level >= PIPELINE_LOG_LEVEL_ ## level) \
+ fprintf(stdout, "[%s] " fmt "\n", p->name, ## __VA_ARGS__);\
+} while (0)
+
+static inline void *
+pipeline_msg_recv(struct pipeline *p,
+ uint32_t msgq_id)
+{
+ struct rte_ring *r = p->msgq_in[msgq_id];
+ void *msg;
+ int status = rte_ring_sc_dequeue(r, &msg);
+
+ if (status != 0)
+ return NULL;
+
+ return msg;
+}
+
+static inline void
+pipeline_msg_send(struct pipeline *p,
+ uint32_t msgq_id,
+ void *msg)
+{
+ struct rte_ring *r = p->msgq_out[msgq_id];
+ int status;
+
+ do {
+ status = rte_ring_sp_enqueue(r, msg);
+ } while (status == -ENOBUFS);
+}
+
+struct pipeline_msg_req {
+ enum pipeline_msg_req_type type;
+};
+
+struct pipeline_stats_msg_req {
+ enum pipeline_msg_req_type type;
+ uint32_t id;
+};
+
+struct pipeline_port_in_msg_req {
+ enum pipeline_msg_req_type type;
+ uint32_t port_id;
+};
+
+struct pipeline_custom_msg_req {
+ enum pipeline_msg_req_type type;
+ uint32_t subtype;
+};
+
+struct pipeline_msg_rsp {
+ int status;
+};
+
+struct pipeline_stats_port_in_msg_rsp {
+ int status;
+ struct rte_pipeline_port_in_stats stats;
+};
+
+struct pipeline_stats_port_out_msg_rsp {
+ int status;
+ struct rte_pipeline_port_out_stats stats;
+};
+
+struct pipeline_stats_table_msg_rsp {
+ int status;
+ struct rte_pipeline_table_stats stats;
+};
+
+void *pipeline_msg_req_ping_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_stats_port_in_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_stats_port_out_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_stats_table_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_port_in_enable_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_port_in_disable_handler(struct pipeline *p, void *msg);
+void *pipeline_msg_req_invalid_handler(struct pipeline *p, void *msg);
+
+int pipeline_msg_req_handle(struct pipeline *p);
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_common_fe.c b/examples/ip_pipeline/pipeline/pipeline_common_fe.c
new file mode 100644
index 00000000..a691d422
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_common_fe.c
@@ -0,0 +1,1310 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_malloc.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "pipeline_common_fe.h"
+
+int
+app_pipeline_ping(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_msg_req *req;
+ struct pipeline_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if (p == NULL)
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_PING;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_pipeline_stats_port_in(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id,
+ struct rte_pipeline_port_in_stats *stats)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_stats_msg_req *req;
+ struct pipeline_stats_port_in_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (stats == NULL))
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if ((p == NULL) ||
+ (port_id >= p->n_pktq_in))
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_STATS_PORT_IN;
+ req->id = port_id;
+
+ /* Send request and wait for response */
+ rsp = (struct pipeline_stats_port_in_msg_rsp *)
+ app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+ if (status == 0)
+ memcpy(stats, &rsp->stats, sizeof(rsp->stats));
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_pipeline_stats_port_out(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id,
+ struct rte_pipeline_port_out_stats *stats)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_stats_msg_req *req;
+ struct pipeline_stats_port_out_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (pipeline_id >= app->n_pipelines) ||
+ (stats == NULL))
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if ((p == NULL) ||
+ (port_id >= p->n_pktq_out))
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_STATS_PORT_OUT;
+ req->id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+ if (status == 0)
+ memcpy(stats, &rsp->stats, sizeof(rsp->stats));
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_pipeline_stats_table(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t table_id,
+ struct rte_pipeline_table_stats *stats)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_stats_msg_req *req;
+ struct pipeline_stats_table_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (stats == NULL))
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if (p == NULL)
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_STATS_TABLE;
+ req->id = table_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+ if (status == 0)
+ memcpy(stats, &rsp->stats, sizeof(rsp->stats));
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_pipeline_port_in_enable(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_port_in_msg_req *req;
+ struct pipeline_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if ((p == NULL) ||
+ (port_id >= p->n_pktq_in))
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_PORT_IN_ENABLE;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_pipeline_port_in_disable(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct app_pipeline_params *p;
+ struct pipeline_port_in_msg_req *req;
+ struct pipeline_msg_rsp *rsp;
+ int status = 0;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", pipeline_id, p);
+ if ((p == NULL) ||
+ (port_id >= p->n_pktq_in))
+ return -1;
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ /* Fill in request */
+ req->type = PIPELINE_MSG_REQ_PORT_IN_DISABLE;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Check response */
+ status = rsp->status;
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+
+ return status;
+}
+
+int
+app_link_config(struct app_params *app,
+ uint32_t link_id,
+ uint32_t ip,
+ uint32_t depth)
+{
+ struct app_link_params *p;
+ uint32_t i, netmask, host, bcast;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p);
+ if (p == NULL) {
+ APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link",
+ link_id);
+ return -1;
+ }
+
+ if (p->state) {
+ APP_LOG(app, HIGH, "%s is UP, please bring it DOWN first",
+ p->name);
+ return -1;
+ }
+
+ netmask = (~0U) << (32 - depth);
+ host = ip & netmask;
+ bcast = host | (~netmask);
+
+ if ((ip == 0) ||
+ (ip == UINT32_MAX) ||
+ (ip == host) ||
+ (ip == bcast)) {
+ APP_LOG(app, HIGH, "Illegal IP address");
+ return -1;
+ }
+
+ for (i = 0; i < app->n_links; i++) {
+ struct app_link_params *link = &app->link_params[i];
+
+ if (strcmp(p->name, link->name) == 0)
+ continue;
+
+ if (link->ip == ip) {
+ APP_LOG(app, HIGH,
+ "%s is already assigned this IP address",
+ link->name);
+ return -1;
+ }
+ }
+
+ if ((depth == 0) || (depth > 32)) {
+ APP_LOG(app, HIGH, "Illegal value for depth parameter "
+ "(%" PRIu32 ")",
+ depth);
+ return -1;
+ }
+
+ /* Save link parameters */
+ p->ip = ip;
+ p->depth = depth;
+
+ return 0;
+}
+
+int
+app_link_up(struct app_params *app,
+ uint32_t link_id)
+{
+ struct app_link_params *p;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p);
+ if (p == NULL) {
+ APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link",
+ link_id);
+ return -1;
+ }
+
+ /* Check link state */
+ if (p->state) {
+ APP_LOG(app, HIGH, "%s is already UP", p->name);
+ return 0;
+ }
+
+ /* Check that IP address is valid */
+ if (p->ip == 0) {
+ APP_LOG(app, HIGH, "%s IP address is not set", p->name);
+ return 0;
+ }
+
+ app_link_up_internal(app, p);
+
+ return 0;
+}
+
+int
+app_link_down(struct app_params *app,
+ uint32_t link_id)
+{
+ struct app_link_params *p;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p);
+ if (p == NULL) {
+ APP_LOG(app, HIGH, "LINK%" PRIu32 " is not a valid link",
+ link_id);
+ return -1;
+ }
+
+ /* Check link state */
+ if (p->state == 0) {
+ APP_LOG(app, HIGH, "%s is already DOWN", p->name);
+ return 0;
+ }
+
+ app_link_down_internal(app, p);
+
+ return 0;
+}
+
+/*
+ * ping
+ */
+
+struct cmd_ping_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t ping_string;
+};
+
+static void
+cmd_ping_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_ping_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_ping(app, params->pipeline_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_ping_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_ping_result, p_string, "p");
+
+cmdline_parse_token_num_t cmd_ping_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_ping_result, pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_ping_ping_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_ping_result, ping_string, "ping");
+
+cmdline_parse_inst_t cmd_ping = {
+ .f = cmd_ping_parsed,
+ .data = NULL,
+ .help_str = "Pipeline ping",
+ .tokens = {
+ (void *) &cmd_ping_p_string,
+ (void *) &cmd_ping_pipeline_id,
+ (void *) &cmd_ping_ping_string,
+ NULL,
+ },
+};
+
+/*
+ * stats port in
+ */
+
+struct cmd_stats_port_in_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t port_string;
+ cmdline_fixed_string_t in_string;
+ uint32_t port_in_id;
+
+};
+static void
+cmd_stats_port_in_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_stats_port_in_result *params = parsed_result;
+ struct app_params *app = data;
+ struct rte_pipeline_port_in_stats stats;
+ int status;
+
+ status = app_pipeline_stats_port_in(app,
+ params->pipeline_id,
+ params->port_in_id,
+ &stats);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ /* Display stats */
+ printf("Pipeline %" PRIu32 " - stats for input port %" PRIu32 ":\n"
+ "\tPkts in: %" PRIu64 "\n"
+ "\tPkts dropped by AH: %" PRIu64 "\n"
+ "\tPkts dropped by other: %" PRIu64 "\n",
+ params->pipeline_id,
+ params->port_in_id,
+ stats.stats.n_pkts_in,
+ stats.n_pkts_dropped_by_ah,
+ stats.stats.n_pkts_drop);
+}
+
+cmdline_parse_token_string_t cmd_stats_port_in_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_stats_port_in_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_port_in_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_stats_port_in_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, stats_string,
+ "stats");
+
+cmdline_parse_token_string_t cmd_stats_port_in_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, port_string,
+ "port");
+
+cmdline_parse_token_string_t cmd_stats_port_in_in_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_in_result, in_string,
+ "in");
+
+ cmdline_parse_token_num_t cmd_stats_port_in_port_in_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_port_in_result, port_in_id,
+ UINT32);
+
+cmdline_parse_inst_t cmd_stats_port_in = {
+ .f = cmd_stats_port_in_parsed,
+ .data = NULL,
+ .help_str = "Pipeline input port stats",
+ .tokens = {
+ (void *) &cmd_stats_port_in_p_string,
+ (void *) &cmd_stats_port_in_pipeline_id,
+ (void *) &cmd_stats_port_in_stats_string,
+ (void *) &cmd_stats_port_in_port_string,
+ (void *) &cmd_stats_port_in_in_string,
+ (void *) &cmd_stats_port_in_port_in_id,
+ NULL,
+ },
+};
+
+/*
+ * stats port out
+ */
+
+struct cmd_stats_port_out_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t port_string;
+ cmdline_fixed_string_t out_string;
+ uint32_t port_out_id;
+};
+
+static void
+cmd_stats_port_out_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+
+ struct cmd_stats_port_out_result *params = parsed_result;
+ struct app_params *app = data;
+ struct rte_pipeline_port_out_stats stats;
+ int status;
+
+ status = app_pipeline_stats_port_out(app,
+ params->pipeline_id,
+ params->port_out_id,
+ &stats);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ /* Display stats */
+ printf("Pipeline %" PRIu32 " - stats for output port %" PRIu32 ":\n"
+ "\tPkts in: %" PRIu64 "\n"
+ "\tPkts dropped by AH: %" PRIu64 "\n"
+ "\tPkts dropped by other: %" PRIu64 "\n",
+ params->pipeline_id,
+ params->port_out_id,
+ stats.stats.n_pkts_in,
+ stats.n_pkts_dropped_by_ah,
+ stats.stats.n_pkts_drop);
+}
+
+cmdline_parse_token_string_t cmd_stats_port_out_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_stats_port_out_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_port_out_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_stats_port_out_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, stats_string,
+ "stats");
+
+cmdline_parse_token_string_t cmd_stats_port_out_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, port_string,
+ "port");
+
+cmdline_parse_token_string_t cmd_stats_port_out_out_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_port_out_result, out_string,
+ "out");
+
+cmdline_parse_token_num_t cmd_stats_port_out_port_out_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_port_out_result, port_out_id,
+ UINT32);
+
+cmdline_parse_inst_t cmd_stats_port_out = {
+ .f = cmd_stats_port_out_parsed,
+ .data = NULL,
+ .help_str = "Pipeline output port stats",
+ .tokens = {
+ (void *) &cmd_stats_port_out_p_string,
+ (void *) &cmd_stats_port_out_pipeline_id,
+ (void *) &cmd_stats_port_out_stats_string,
+ (void *) &cmd_stats_port_out_port_string,
+ (void *) &cmd_stats_port_out_out_string,
+ (void *) &cmd_stats_port_out_port_out_id,
+ NULL,
+ },
+};
+
+/*
+ * stats table
+ */
+
+struct cmd_stats_table_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t table_string;
+ uint32_t table_id;
+};
+
+static void
+cmd_stats_table_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_stats_table_result *params = parsed_result;
+ struct app_params *app = data;
+ struct rte_pipeline_table_stats stats;
+ int status;
+
+ status = app_pipeline_stats_table(app,
+ params->pipeline_id,
+ params->table_id,
+ &stats);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ /* Display stats */
+ printf("Pipeline %" PRIu32 " - stats for table %" PRIu32 ":\n"
+ "\tPkts in: %" PRIu64 "\n"
+ "\tPkts in with lookup miss: %" PRIu64 "\n"
+ "\tPkts in with lookup hit dropped by AH: %" PRIu64 "\n"
+ "\tPkts in with lookup hit dropped by others: %" PRIu64 "\n"
+ "\tPkts in with lookup miss dropped by AH: %" PRIu64 "\n"
+ "\tPkts in with lookup miss dropped by others: %" PRIu64 "\n",
+ params->pipeline_id,
+ params->table_id,
+ stats.stats.n_pkts_in,
+ stats.stats.n_pkts_lookup_miss,
+ stats.n_pkts_dropped_by_lkp_hit_ah,
+ stats.n_pkts_dropped_lkp_hit,
+ stats.n_pkts_dropped_by_lkp_miss_ah,
+ stats.n_pkts_dropped_lkp_miss);
+}
+
+cmdline_parse_token_string_t cmd_stats_table_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_stats_table_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_table_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_stats_table_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, stats_string,
+ "stats");
+
+cmdline_parse_token_string_t cmd_stats_table_table_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_stats_table_result, table_string,
+ "table");
+
+cmdline_parse_token_num_t cmd_stats_table_table_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_stats_table_result, table_id, UINT32);
+
+cmdline_parse_inst_t cmd_stats_table = {
+ .f = cmd_stats_table_parsed,
+ .data = NULL,
+ .help_str = "Pipeline table stats",
+ .tokens = {
+ (void *) &cmd_stats_table_p_string,
+ (void *) &cmd_stats_table_pipeline_id,
+ (void *) &cmd_stats_table_stats_string,
+ (void *) &cmd_stats_table_table_string,
+ (void *) &cmd_stats_table_table_id,
+ NULL,
+ },
+};
+
+/*
+ * port in enable
+ */
+
+struct cmd_port_in_enable_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t port_string;
+ cmdline_fixed_string_t in_string;
+ uint32_t port_in_id;
+ cmdline_fixed_string_t enable_string;
+};
+
+static void
+cmd_port_in_enable_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_port_in_enable_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_port_in_enable(app,
+ params->pipeline_id,
+ params->port_in_id);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_port_in_enable_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_port_in_enable_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_port_in_enable_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_port_in_enable_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, port_string,
+ "port");
+
+cmdline_parse_token_string_t cmd_port_in_enable_in_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result, in_string,
+ "in");
+
+cmdline_parse_token_num_t cmd_port_in_enable_port_in_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_port_in_enable_result, port_in_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_port_in_enable_enable_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_enable_result,
+ enable_string, "enable");
+
+cmdline_parse_inst_t cmd_port_in_enable = {
+ .f = cmd_port_in_enable_parsed,
+ .data = NULL,
+ .help_str = "Pipeline input port enable",
+ .tokens = {
+ (void *) &cmd_port_in_enable_p_string,
+ (void *) &cmd_port_in_enable_pipeline_id,
+ (void *) &cmd_port_in_enable_port_string,
+ (void *) &cmd_port_in_enable_in_string,
+ (void *) &cmd_port_in_enable_port_in_id,
+ (void *) &cmd_port_in_enable_enable_string,
+ NULL,
+ },
+};
+
+/*
+ * port in disable
+ */
+
+struct cmd_port_in_disable_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t port_string;
+ cmdline_fixed_string_t in_string;
+ uint32_t port_in_id;
+ cmdline_fixed_string_t disable_string;
+};
+
+static void
+cmd_port_in_disable_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_port_in_disable_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_port_in_disable(app,
+ params->pipeline_id,
+ params->port_in_id);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_port_in_disable_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_port_in_disable_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_port_in_disable_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_port_in_disable_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, port_string,
+ "port");
+
+cmdline_parse_token_string_t cmd_port_in_disable_in_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result, in_string,
+ "in");
+
+cmdline_parse_token_num_t cmd_port_in_disable_port_in_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_port_in_disable_result, port_in_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_port_in_disable_disable_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_port_in_disable_result,
+ disable_string, "disable");
+
+cmdline_parse_inst_t cmd_port_in_disable = {
+ .f = cmd_port_in_disable_parsed,
+ .data = NULL,
+ .help_str = "Pipeline input port disable",
+ .tokens = {
+ (void *) &cmd_port_in_disable_p_string,
+ (void *) &cmd_port_in_disable_pipeline_id,
+ (void *) &cmd_port_in_disable_port_string,
+ (void *) &cmd_port_in_disable_in_string,
+ (void *) &cmd_port_in_disable_port_in_id,
+ (void *) &cmd_port_in_disable_disable_string,
+ NULL,
+ },
+};
+
+/*
+ * link config
+ */
+
+static void
+print_link_info(struct app_link_params *p)
+{
+ struct rte_eth_stats stats;
+ struct ether_addr *mac_addr;
+ uint32_t netmask = (~0U) << (32 - p->depth);
+ uint32_t host = p->ip & netmask;
+ uint32_t bcast = host | (~netmask);
+
+ memset(&stats, 0, sizeof(stats));
+ rte_eth_stats_get(p->pmd_id, &stats);
+
+ mac_addr = (struct ether_addr *) &p->mac_addr;
+
+ if (strlen(p->pci_bdf))
+ printf("%s(%s): flags=<%s>\n",
+ p->name,
+ p->pci_bdf,
+ (p->state) ? "UP" : "DOWN");
+ else
+ printf("%s: flags=<%s>\n",
+ p->name,
+ (p->state) ? "UP" : "DOWN");
+
+ if (p->ip)
+ printf("\tinet %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32
+ " netmask %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32 " "
+ "broadcast %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32 "\n",
+ (p->ip >> 24) & 0xFF,
+ (p->ip >> 16) & 0xFF,
+ (p->ip >> 8) & 0xFF,
+ p->ip & 0xFF,
+ (netmask >> 24) & 0xFF,
+ (netmask >> 16) & 0xFF,
+ (netmask >> 8) & 0xFF,
+ netmask & 0xFF,
+ (bcast >> 24) & 0xFF,
+ (bcast >> 16) & 0xFF,
+ (bcast >> 8) & 0xFF,
+ bcast & 0xFF);
+
+ printf("\tether %02" PRIx32 ":%02" PRIx32 ":%02" PRIx32
+ ":%02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 "\n",
+ mac_addr->addr_bytes[0],
+ mac_addr->addr_bytes[1],
+ mac_addr->addr_bytes[2],
+ mac_addr->addr_bytes[3],
+ mac_addr->addr_bytes[4],
+ mac_addr->addr_bytes[5]);
+
+ printf("\tRX packets %" PRIu64
+ " bytes %" PRIu64
+ "\n",
+ stats.ipackets,
+ stats.ibytes);
+
+ printf("\tRX errors %" PRIu64
+ " missed %" PRIu64
+ " no-mbuf %" PRIu64
+ "\n",
+ stats.ierrors,
+ stats.imissed,
+ stats.rx_nombuf);
+
+ printf("\tTX packets %" PRIu64
+ " bytes %" PRIu64 "\n",
+ stats.opackets,
+ stats.obytes);
+
+ printf("\tTX errors %" PRIu64
+ "\n",
+ stats.oerrors);
+
+ printf("\n");
+}
+
+struct cmd_link_config_result {
+ cmdline_fixed_string_t link_string;
+ uint32_t link_id;
+ cmdline_fixed_string_t config_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+};
+
+static void
+cmd_link_config_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_link_config_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ uint32_t link_id = params->link_id;
+ uint32_t ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ uint32_t depth = params->depth;
+
+ status = app_link_config(app, link_id, ip, depth);
+ if (status)
+ printf("Command failed\n");
+ else {
+ struct app_link_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p);
+ print_link_info(p);
+ }
+}
+
+cmdline_parse_token_string_t cmd_link_config_link_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_config_result, link_string,
+ "link");
+
+cmdline_parse_token_num_t cmd_link_config_link_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_link_config_result, link_id, UINT32);
+
+cmdline_parse_token_string_t cmd_link_config_config_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_config_result, config_string,
+ "config");
+
+cmdline_parse_token_ipaddr_t cmd_link_config_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_link_config_result, ip);
+
+cmdline_parse_token_num_t cmd_link_config_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_link_config_result, depth, UINT32);
+
+cmdline_parse_inst_t cmd_link_config = {
+ .f = cmd_link_config_parsed,
+ .data = NULL,
+ .help_str = "Link configuration",
+ .tokens = {
+ (void *)&cmd_link_config_link_string,
+ (void *)&cmd_link_config_link_id,
+ (void *)&cmd_link_config_config_string,
+ (void *)&cmd_link_config_ip,
+ (void *)&cmd_link_config_depth,
+ NULL,
+ },
+};
+
+/*
+ * link up
+ */
+
+struct cmd_link_up_result {
+ cmdline_fixed_string_t link_string;
+ uint32_t link_id;
+ cmdline_fixed_string_t up_string;
+};
+
+static void
+cmd_link_up_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_link_up_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_link_up(app, params->link_id);
+ if (status != 0)
+ printf("Command failed\n");
+ else {
+ struct app_link_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", params->link_id,
+ p);
+ print_link_info(p);
+ }
+}
+
+cmdline_parse_token_string_t cmd_link_up_link_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_up_result, link_string,
+ "link");
+
+cmdline_parse_token_num_t cmd_link_up_link_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_link_up_result, link_id, UINT32);
+
+cmdline_parse_token_string_t cmd_link_up_up_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_up_result, up_string, "up");
+
+cmdline_parse_inst_t cmd_link_up = {
+ .f = cmd_link_up_parsed,
+ .data = NULL,
+ .help_str = "Link UP",
+ .tokens = {
+ (void *)&cmd_link_up_link_string,
+ (void *)&cmd_link_up_link_id,
+ (void *)&cmd_link_up_up_string,
+ NULL,
+ },
+};
+
+/*
+ * link down
+ */
+
+struct cmd_link_down_result {
+ cmdline_fixed_string_t link_string;
+ uint32_t link_id;
+ cmdline_fixed_string_t down_string;
+};
+
+static void
+cmd_link_down_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_link_down_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_link_down(app, params->link_id);
+ if (status != 0)
+ printf("Command failed\n");
+ else {
+ struct app_link_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", params->link_id,
+ p);
+ print_link_info(p);
+ }
+}
+
+cmdline_parse_token_string_t cmd_link_down_link_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_down_result, link_string,
+ "link");
+
+cmdline_parse_token_num_t cmd_link_down_link_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_link_down_result, link_id, UINT32);
+
+cmdline_parse_token_string_t cmd_link_down_down_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_down_result, down_string,
+ "down");
+
+cmdline_parse_inst_t cmd_link_down = {
+ .f = cmd_link_down_parsed,
+ .data = NULL,
+ .help_str = "Link DOWN",
+ .tokens = {
+ (void *) &cmd_link_down_link_string,
+ (void *) &cmd_link_down_link_id,
+ (void *) &cmd_link_down_down_string,
+ NULL,
+ },
+};
+
+/*
+ * link ls
+ */
+
+struct cmd_link_ls_result {
+ cmdline_fixed_string_t link_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_link_ls_parsed(
+ __attribute__((unused)) void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct app_params *app = data;
+ uint32_t link_id;
+
+ for (link_id = 0; link_id < app->n_links; link_id++) {
+ struct app_link_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->link_params, "LINK", link_id, p);
+ print_link_info(p);
+ }
+}
+
+cmdline_parse_token_string_t cmd_link_ls_link_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_ls_result, link_string,
+ "link");
+
+cmdline_parse_token_string_t cmd_link_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_link_ls_result, ls_string, "ls");
+
+cmdline_parse_inst_t cmd_link_ls = {
+ .f = cmd_link_ls_parsed,
+ .data = NULL,
+ .help_str = "Link list",
+ .tokens = {
+ (void *)&cmd_link_ls_link_string,
+ (void *)&cmd_link_ls_ls_string,
+ NULL,
+ },
+};
+
+/*
+ * quit
+ */
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void
+cmd_quit_parsed(
+ __rte_unused void *parsed_result,
+ struct cmdline *cl,
+ __rte_unused void *data)
+{
+ cmdline_quit(cl);
+}
+
+static cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+static cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed,
+ .data = NULL,
+ .help_str = "Quit",
+ .tokens = {
+ (void *) &cmd_quit_quit,
+ NULL,
+ },
+};
+
+/*
+ * run
+ */
+
+static void
+app_run_file(
+ cmdline_parse_ctx_t *ctx,
+ const char *file_name)
+{
+ struct cmdline *file_cl;
+ int fd;
+
+ fd = open(file_name, O_RDONLY);
+ if (fd < 0) {
+ printf("Cannot open file \"%s\"\n", file_name);
+ return;
+ }
+
+ file_cl = cmdline_new(ctx, "", fd, 1);
+ cmdline_interact(file_cl);
+ close(fd);
+}
+
+struct cmd_run_file_result {
+ cmdline_fixed_string_t run_string;
+ char file_name[APP_FILE_NAME_SIZE];
+};
+
+static void
+cmd_run_parsed(
+ void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_run_file_result *params = parsed_result;
+
+ app_run_file(cl->ctx, params->file_name);
+}
+
+cmdline_parse_token_string_t cmd_run_run_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_run_file_result, run_string,
+ "run");
+
+cmdline_parse_token_string_t cmd_run_file_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_run_file_result, file_name, NULL);
+
+cmdline_parse_inst_t cmd_run = {
+ .f = cmd_run_parsed,
+ .data = NULL,
+ .help_str = "Run CLI script file",
+ .tokens = {
+ (void *) &cmd_run_run_string,
+ (void *) &cmd_run_file_name,
+ NULL,
+ },
+};
+
+static cmdline_parse_ctx_t pipeline_common_cmds[] = {
+ (cmdline_parse_inst_t *) &cmd_quit,
+ (cmdline_parse_inst_t *) &cmd_run,
+
+ (cmdline_parse_inst_t *) &cmd_link_config,
+ (cmdline_parse_inst_t *) &cmd_link_up,
+ (cmdline_parse_inst_t *) &cmd_link_down,
+ (cmdline_parse_inst_t *) &cmd_link_ls,
+
+ (cmdline_parse_inst_t *) &cmd_ping,
+ (cmdline_parse_inst_t *) &cmd_stats_port_in,
+ (cmdline_parse_inst_t *) &cmd_stats_port_out,
+ (cmdline_parse_inst_t *) &cmd_stats_table,
+ (cmdline_parse_inst_t *) &cmd_port_in_enable,
+ (cmdline_parse_inst_t *) &cmd_port_in_disable,
+ NULL,
+};
+
+int
+app_pipeline_common_cmd_push(struct app_params *app)
+{
+ uint32_t n_cmds, i;
+
+ /* Check for available slots in the application commands array */
+ n_cmds = RTE_DIM(pipeline_common_cmds) - 1;
+ if (n_cmds > APP_MAX_CMDS - app->n_cmds)
+ return -ENOMEM;
+
+ /* Push pipeline commands into the application */
+ memcpy(&app->cmds[app->n_cmds],
+ pipeline_common_cmds,
+ n_cmds * sizeof(cmdline_parse_ctx_t));
+
+ for (i = 0; i < n_cmds; i++)
+ app->cmds[app->n_cmds + i]->data = app;
+
+ app->n_cmds += n_cmds;
+ app->cmds[app->n_cmds] = NULL;
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/pipeline/pipeline_common_fe.h b/examples/ip_pipeline/pipeline/pipeline_common_fe.h
new file mode 100644
index 00000000..cfad963d
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_common_fe.h
@@ -0,0 +1,234 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_COMMON_FE_H__
+#define __INCLUDE_PIPELINE_COMMON_FE_H__
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <cmdline_parse.h>
+
+#include "pipeline_common_be.h"
+#include "pipeline.h"
+#include "app.h"
+
+#ifndef MSG_TIMEOUT_DEFAULT
+#define MSG_TIMEOUT_DEFAULT 1000
+#endif
+
+static inline struct app_pipeline_data *
+app_pipeline_data(struct app_params *app, uint32_t id)
+{
+ struct app_pipeline_params *params;
+
+ APP_PARAM_FIND_BY_ID(app->pipeline_params, "PIPELINE", id, params);
+ if (params == NULL)
+ return NULL;
+
+ return &app->pipeline_data[params - app->pipeline_params];
+}
+
+static inline void *
+app_pipeline_data_fe(struct app_params *app, uint32_t id, struct pipeline_type *ptype)
+{
+ struct app_pipeline_data *pipeline_data;
+
+ pipeline_data = app_pipeline_data(app, id);
+ if (pipeline_data == NULL)
+ return NULL;
+
+ if (strcmp(pipeline_data->ptype->name, ptype->name) != 0)
+ return NULL;
+
+ if (pipeline_data->enabled == 0)
+ return NULL;
+
+ return pipeline_data->fe;
+}
+
+static inline struct rte_ring *
+app_pipeline_msgq_in_get(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_msgq_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->msgq_params,
+ "MSGQ-REQ-PIPELINE",
+ pipeline_id,
+ p);
+ if (p == NULL)
+ return NULL;
+
+ return app->msgq[p - app->msgq_params];
+}
+
+static inline struct rte_ring *
+app_pipeline_msgq_out_get(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_msgq_params *p;
+
+ APP_PARAM_FIND_BY_ID(app->msgq_params,
+ "MSGQ-RSP-PIPELINE",
+ pipeline_id,
+ p);
+ if (p == NULL)
+ return NULL;
+
+ return app->msgq[p - app->msgq_params];
+}
+
+static inline void *
+app_msg_alloc(__rte_unused struct app_params *app)
+{
+ return rte_malloc(NULL, 2048, RTE_CACHE_LINE_SIZE);
+}
+
+static inline void
+app_msg_free(__rte_unused struct app_params *app,
+ void *msg)
+{
+ rte_free(msg);
+}
+
+static inline void
+app_msg_send(struct app_params *app,
+ uint32_t pipeline_id,
+ void *msg)
+{
+ struct rte_ring *r = app_pipeline_msgq_in_get(app, pipeline_id);
+ int status;
+
+ do {
+ status = rte_ring_sp_enqueue(r, msg);
+ } while (status == -ENOBUFS);
+}
+
+static inline void *
+app_msg_recv(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct rte_ring *r = app_pipeline_msgq_out_get(app, pipeline_id);
+ void *msg;
+ int status = rte_ring_sc_dequeue(r, &msg);
+
+ if (status != 0)
+ return NULL;
+
+ return msg;
+}
+
+static inline void *
+app_msg_send_recv(struct app_params *app,
+ uint32_t pipeline_id,
+ void *msg,
+ uint32_t timeout_ms)
+{
+ struct rte_ring *r_req = app_pipeline_msgq_in_get(app, pipeline_id);
+ struct rte_ring *r_rsp = app_pipeline_msgq_out_get(app, pipeline_id);
+ uint64_t hz = rte_get_tsc_hz();
+ void *msg_recv;
+ uint64_t deadline;
+ int status;
+
+ /* send */
+ do {
+ status = rte_ring_sp_enqueue(r_req, (void *) msg);
+ } while (status == -ENOBUFS);
+
+ /* recv */
+ deadline = (timeout_ms) ?
+ (rte_rdtsc() + ((hz * timeout_ms) / 1000)) :
+ UINT64_MAX;
+
+ do {
+ if (rte_rdtsc() > deadline)
+ return NULL;
+
+ status = rte_ring_sc_dequeue(r_rsp, &msg_recv);
+ } while (status != 0);
+
+ return msg_recv;
+}
+
+int
+app_pipeline_ping(struct app_params *app,
+ uint32_t pipeline_id);
+
+int
+app_pipeline_stats_port_in(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id,
+ struct rte_pipeline_port_in_stats *stats);
+
+int
+app_pipeline_stats_port_out(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id,
+ struct rte_pipeline_port_out_stats *stats);
+
+int
+app_pipeline_stats_table(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t table_id,
+ struct rte_pipeline_table_stats *stats);
+
+int
+app_pipeline_port_in_enable(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_pipeline_port_in_disable(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_link_config(struct app_params *app,
+ uint32_t link_id,
+ uint32_t ip,
+ uint32_t depth);
+
+int
+app_link_up(struct app_params *app,
+ uint32_t link_id);
+
+int
+app_link_down(struct app_params *app,
+ uint32_t link_id);
+
+int
+app_pipeline_common_cmd_push(struct app_params *app);
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall.c b/examples/ip_pipeline/pipeline/pipeline_firewall.c
new file mode 100644
index 00000000..fd897d5c
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_firewall.c
@@ -0,0 +1,1869 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#include <rte_common.h>
+#include <rte_hexdump.h>
+#include <rte_malloc.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+#include <cmdline_socket.h>
+
+#include "app.h"
+#include "pipeline_common_fe.h"
+#include "pipeline_firewall.h"
+
+#define BUF_SIZE 1024
+
+struct app_pipeline_firewall_rule {
+ struct pipeline_firewall_key key;
+ int32_t priority;
+ uint32_t port_id;
+ void *entry_ptr;
+
+ TAILQ_ENTRY(app_pipeline_firewall_rule) node;
+};
+
+struct app_pipeline_firewall {
+ /* parameters */
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+
+ /* rules */
+ TAILQ_HEAD(, app_pipeline_firewall_rule) rules;
+ uint32_t n_rules;
+ uint32_t default_rule_present;
+ uint32_t default_rule_port_id;
+ void *default_rule_entry_ptr;
+};
+
+struct app_pipeline_add_bulk_params {
+ struct pipeline_firewall_key *keys;
+ uint32_t n_keys;
+ uint32_t *priorities;
+ uint32_t *port_ids;
+};
+
+struct app_pipeline_del_bulk_params {
+ struct pipeline_firewall_key *keys;
+ uint32_t n_keys;
+};
+
+static void
+print_firewall_ipv4_rule(struct app_pipeline_firewall_rule *rule)
+{
+ printf("Prio = %" PRId32 " (SA = %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32 "/%" PRIu32 ", "
+ "DA = %" PRIu32 ".%" PRIu32
+ ".%"PRIu32 ".%" PRIu32 "/%" PRIu32 ", "
+ "SP = %" PRIu32 "-%" PRIu32 ", "
+ "DP = %" PRIu32 "-%" PRIu32 ", "
+ "Proto = %" PRIu32 " / 0x%" PRIx32 ") => "
+ "Port = %" PRIu32 " (entry ptr = %p)\n",
+
+ rule->priority,
+
+ (rule->key.key.ipv4_5tuple.src_ip >> 24) & 0xFF,
+ (rule->key.key.ipv4_5tuple.src_ip >> 16) & 0xFF,
+ (rule->key.key.ipv4_5tuple.src_ip >> 8) & 0xFF,
+ rule->key.key.ipv4_5tuple.src_ip & 0xFF,
+ rule->key.key.ipv4_5tuple.src_ip_mask,
+
+ (rule->key.key.ipv4_5tuple.dst_ip >> 24) & 0xFF,
+ (rule->key.key.ipv4_5tuple.dst_ip >> 16) & 0xFF,
+ (rule->key.key.ipv4_5tuple.dst_ip >> 8) & 0xFF,
+ rule->key.key.ipv4_5tuple.dst_ip & 0xFF,
+ rule->key.key.ipv4_5tuple.dst_ip_mask,
+
+ rule->key.key.ipv4_5tuple.src_port_from,
+ rule->key.key.ipv4_5tuple.src_port_to,
+
+ rule->key.key.ipv4_5tuple.dst_port_from,
+ rule->key.key.ipv4_5tuple.dst_port_to,
+
+ rule->key.key.ipv4_5tuple.proto,
+ rule->key.key.ipv4_5tuple.proto_mask,
+
+ rule->port_id,
+ rule->entry_ptr);
+}
+
+static struct app_pipeline_firewall_rule *
+app_pipeline_firewall_rule_find(struct app_pipeline_firewall *p,
+ struct pipeline_firewall_key *key)
+{
+ struct app_pipeline_firewall_rule *r;
+
+ TAILQ_FOREACH(r, &p->rules, node)
+ if (memcmp(key,
+ &r->key,
+ sizeof(struct pipeline_firewall_key)) == 0)
+ return r;
+
+ return NULL;
+}
+
+static int
+app_pipeline_firewall_ls(
+ struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_firewall *p;
+ struct app_pipeline_firewall_rule *rule;
+ uint32_t n_rules;
+ int priority;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ n_rules = p->n_rules;
+ for (priority = 0; n_rules; priority++)
+ TAILQ_FOREACH(rule, &p->rules, node)
+ if (rule->priority == priority) {
+ print_firewall_ipv4_rule(rule);
+ n_rules--;
+ }
+
+ if (p->default_rule_present)
+ printf("Default rule: port %" PRIu32 " (entry ptr = %p)\n",
+ p->default_rule_port_id,
+ p->default_rule_entry_ptr);
+ else
+ printf("Default rule: DROP\n");
+
+ printf("\n");
+
+ return 0;
+}
+
+static void*
+app_pipeline_firewall_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct app_pipeline_firewall *p;
+ uint32_t size;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_firewall));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+
+ /* Initialization */
+ p->n_ports_in = params->n_ports_in;
+ p->n_ports_out = params->n_ports_out;
+
+ TAILQ_INIT(&p->rules);
+ p->n_rules = 0;
+ p->default_rule_present = 0;
+ p->default_rule_port_id = 0;
+ p->default_rule_entry_ptr = NULL;
+
+ return (void *) p;
+}
+
+static int
+app_pipeline_firewall_free(void *pipeline)
+{
+ struct app_pipeline_firewall *p = pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ while (!TAILQ_EMPTY(&p->rules)) {
+ struct app_pipeline_firewall_rule *rule;
+
+ rule = TAILQ_FIRST(&p->rules);
+ TAILQ_REMOVE(&p->rules, rule, node);
+ rte_free(rule);
+ }
+
+ rte_free(p);
+ return 0;
+}
+
+static int
+app_pipeline_firewall_key_check_and_normalize(struct pipeline_firewall_key *key)
+{
+ switch (key->type) {
+ case PIPELINE_FIREWALL_IPV4_5TUPLE:
+ {
+ uint32_t src_ip_depth = key->key.ipv4_5tuple.src_ip_mask;
+ uint32_t dst_ip_depth = key->key.ipv4_5tuple.dst_ip_mask;
+ uint16_t src_port_from = key->key.ipv4_5tuple.src_port_from;
+ uint16_t src_port_to = key->key.ipv4_5tuple.src_port_to;
+ uint16_t dst_port_from = key->key.ipv4_5tuple.dst_port_from;
+ uint16_t dst_port_to = key->key.ipv4_5tuple.dst_port_to;
+
+ uint32_t src_ip_netmask = 0;
+ uint32_t dst_ip_netmask = 0;
+
+ if ((src_ip_depth > 32) ||
+ (dst_ip_depth > 32) ||
+ (src_port_from > src_port_to) ||
+ (dst_port_from > dst_port_to))
+ return -1;
+
+ if (src_ip_depth)
+ src_ip_netmask = (~0U) << (32 - src_ip_depth);
+
+ if (dst_ip_depth)
+ dst_ip_netmask = ((~0U) << (32 - dst_ip_depth));
+
+ key->key.ipv4_5tuple.src_ip &= src_ip_netmask;
+ key->key.ipv4_5tuple.dst_ip &= dst_ip_netmask;
+
+ return 0;
+ }
+
+ default:
+ return -1;
+ }
+}
+
+static int
+app_pipeline_add_bulk_parse_file(char *filename,
+ struct app_pipeline_add_bulk_params *params)
+{
+ FILE *f;
+ char file_buf[BUF_SIZE];
+ uint32_t i;
+ int status = 0;
+
+ f = fopen(filename, "r");
+ if (f == NULL)
+ return -1;
+
+ params->n_keys = 0;
+ while (fgets(file_buf, BUF_SIZE, f) != NULL)
+ params->n_keys++;
+ rewind(f);
+
+ if (params->n_keys == 0) {
+ status = -1;
+ goto end;
+ }
+
+ params->keys = rte_malloc(NULL,
+ params->n_keys * sizeof(struct pipeline_firewall_key),
+ RTE_CACHE_LINE_SIZE);
+ if (params->keys == NULL) {
+ status = -1;
+ goto end;
+ }
+
+ params->priorities = rte_malloc(NULL,
+ params->n_keys * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (params->priorities == NULL) {
+ status = -1;
+ goto end;
+ }
+
+ params->port_ids = rte_malloc(NULL,
+ params->n_keys * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (params->port_ids == NULL) {
+ status = -1;
+ goto end;
+ }
+
+ i = 0;
+ while (fgets(file_buf, BUF_SIZE, f) != NULL) {
+ char *str;
+
+ str = strtok(file_buf, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->priorities[i] = atoi(str);
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip = atoi(str)<<24;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<16;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<8;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip_mask = atoi(str);
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip = atoi(str)<<24;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<16;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<8;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip_mask = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_port_from = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_port_to = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_port_from = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_port_to = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.proto = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ /* Need to add 2 to str to skip leading 0x */
+ params->keys[i].key.ipv4_5tuple.proto_mask = strtol(str+2, NULL, 16);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->port_ids[i] = atoi(str);
+ params->keys[i].type = PIPELINE_FIREWALL_IPV4_5TUPLE;
+
+ i++;
+ }
+
+end:
+ fclose(f);
+ return status;
+}
+
+static int
+app_pipeline_del_bulk_parse_file(char *filename,
+ struct app_pipeline_del_bulk_params *params)
+{
+ FILE *f;
+ char file_buf[BUF_SIZE];
+ uint32_t i;
+ int status = 0;
+
+ f = fopen(filename, "r");
+ if (f == NULL)
+ return -1;
+
+ params->n_keys = 0;
+ while (fgets(file_buf, BUF_SIZE, f) != NULL)
+ params->n_keys++;
+ rewind(f);
+
+ if (params->n_keys == 0) {
+ status = -1;
+ goto end;
+ }
+
+ params->keys = rte_malloc(NULL,
+ params->n_keys * sizeof(struct pipeline_firewall_key),
+ RTE_CACHE_LINE_SIZE);
+ if (params->keys == NULL) {
+ status = -1;
+ goto end;
+ }
+
+ i = 0;
+ while (fgets(file_buf, BUF_SIZE, f) != NULL) {
+ char *str;
+
+ str = strtok(file_buf, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip = atoi(str)<<24;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<16;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str)<<8;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip |= atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_ip_mask = atoi(str);
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip = atoi(str)<<24;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<16;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str)<<8;
+
+ str = strtok(NULL, " .");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip |= atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_ip_mask = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_port_from = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.src_port_to = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_port_from = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.dst_port_to = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ params->keys[i].key.ipv4_5tuple.proto = atoi(str);
+
+ str = strtok(NULL, " ");
+ if (str == NULL) {
+ status = -1;
+ goto end;
+ }
+ /* Need to add 2 to str to skip leading 0x */
+ params->keys[i].key.ipv4_5tuple.proto_mask = strtol(str+2, NULL, 16);
+
+ params->keys[i].type = PIPELINE_FIREWALL_IPV4_5TUPLE;
+
+ i++;
+ }
+
+ for (i = 0; i < params->n_keys; i++) {
+ if (app_pipeline_firewall_key_check_and_normalize(&params->keys[i]) != 0) {
+ status = -1;
+ goto end;
+ }
+ }
+
+end:
+ fclose(f);
+ return status;
+}
+
+int
+app_pipeline_firewall_add_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *key,
+ uint32_t priority,
+ uint32_t port_id)
+{
+ struct app_pipeline_firewall *p;
+ struct app_pipeline_firewall_rule *rule;
+ struct pipeline_firewall_add_msg_req *req;
+ struct pipeline_firewall_add_msg_rsp *rsp;
+ int new_rule;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL) ||
+ (key->type != PIPELINE_FIREWALL_IPV4_5TUPLE))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ if (app_pipeline_firewall_key_check_and_normalize(key) != 0)
+ return -1;
+
+ /* Find existing rule or allocate new rule */
+ rule = app_pipeline_firewall_rule_find(p, key);
+ new_rule = (rule == NULL);
+ if (rule == NULL) {
+ rule = rte_malloc(NULL, sizeof(*rule), RTE_CACHE_LINE_SIZE);
+
+ if (rule == NULL)
+ return -1;
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ if (new_rule)
+ rte_free(rule);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD;
+ memcpy(&req->key, key, sizeof(*key));
+ req->priority = priority;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ if (new_rule)
+ rte_free(rule);
+ return -1;
+ }
+
+ /* Read response and write rule */
+ if (rsp->status ||
+ (rsp->entry_ptr == NULL) ||
+ ((new_rule == 0) && (rsp->key_found == 0)) ||
+ ((new_rule == 1) && (rsp->key_found == 1))) {
+ app_msg_free(app, rsp);
+ if (new_rule)
+ rte_free(rule);
+ return -1;
+ }
+
+ memcpy(&rule->key, key, sizeof(*key));
+ rule->priority = priority;
+ rule->port_id = port_id;
+ rule->entry_ptr = rsp->entry_ptr;
+
+ /* Commit rule */
+ if (new_rule) {
+ TAILQ_INSERT_TAIL(&p->rules, rule, node);
+ p->n_rules++;
+ }
+
+ print_firewall_ipv4_rule(rule);
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_firewall_delete_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *key)
+{
+ struct app_pipeline_firewall *p;
+ struct app_pipeline_firewall_rule *rule;
+ struct pipeline_firewall_del_msg_req *req;
+ struct pipeline_firewall_del_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL) ||
+ (key->type != PIPELINE_FIREWALL_IPV4_5TUPLE))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ if (app_pipeline_firewall_key_check_and_normalize(key) != 0)
+ return -1;
+
+ /* Find rule */
+ rule = app_pipeline_firewall_rule_find(p, key);
+ if (rule == NULL)
+ return 0;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL;
+ memcpy(&req->key, key, sizeof(*key));
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status || !rsp->key_found) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Remove rule */
+ TAILQ_REMOVE(&p->rules, rule, node);
+ p->n_rules--;
+ rte_free(rule);
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_firewall_add_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *keys,
+ uint32_t n_keys,
+ uint32_t *priorities,
+ uint32_t *port_ids)
+{
+ struct app_pipeline_firewall *p;
+ struct pipeline_firewall_add_bulk_msg_req *req;
+ struct pipeline_firewall_add_bulk_msg_rsp *rsp;
+
+ struct app_pipeline_firewall_rule **rules;
+ int *new_rules;
+
+ int *keys_found;
+ void **entries_ptr;
+
+ uint32_t i;
+ int status = 0;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ rules = rte_malloc(NULL,
+ n_keys * sizeof(struct app_pipeline_firewall_rule *),
+ RTE_CACHE_LINE_SIZE);
+ if (rules == NULL)
+ return -1;
+
+ new_rules = rte_malloc(NULL,
+ n_keys * sizeof(int),
+ RTE_CACHE_LINE_SIZE);
+ if (new_rules == NULL) {
+ rte_free(rules);
+ return -1;
+ }
+
+ /* check data integrity and add to rule list */
+ for (i = 0; i < n_keys; i++) {
+ if (port_ids[i] >= p->n_ports_out) {
+ rte_free(rules);
+ rte_free(new_rules);
+ return -1;
+ }
+
+ if (app_pipeline_firewall_key_check_and_normalize(&keys[i]) != 0) {
+ rte_free(rules);
+ rte_free(new_rules);
+ return -1;
+ }
+
+ rules[i] = app_pipeline_firewall_rule_find(p, &keys[i]);
+ new_rules[i] = (rules[i] == NULL);
+ if (rules[i] == NULL) {
+ rules[i] = rte_malloc(NULL, sizeof(*rules[i]),
+ RTE_CACHE_LINE_SIZE);
+
+ if (rules[i] == NULL) {
+ uint32_t j;
+
+ for (j = 0; j <= i; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ return -1;
+ }
+ }
+ }
+
+ keys_found = rte_malloc(NULL,
+ n_keys * sizeof(int),
+ RTE_CACHE_LINE_SIZE);
+ if (keys_found == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < n_keys; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ return -1;
+ }
+
+ entries_ptr = rte_malloc(NULL,
+ n_keys * sizeof(struct rte_pipeline_table_entry *),
+ RTE_CACHE_LINE_SIZE);
+ if (entries_ptr == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < n_keys; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ rte_free(keys_found);
+ return -1;
+ }
+ for (i = 0; i < n_keys; i++) {
+ entries_ptr[i] = rte_malloc(NULL,
+ sizeof(struct rte_pipeline_table_entry),
+ RTE_CACHE_LINE_SIZE);
+
+ if (entries_ptr[i] == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < n_keys; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ for (j = 0; j <= i; j++)
+ rte_free(entries_ptr[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ rte_free(keys_found);
+ rte_free(entries_ptr);
+ return -1;
+ }
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < n_keys; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ for (j = 0; j < n_keys; j++)
+ rte_free(entries_ptr[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ rte_free(keys_found);
+ rte_free(entries_ptr);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD_BULK;
+
+ req->keys = keys;
+ req->n_keys = n_keys;
+ req->port_ids = port_ids;
+ req->priorities = priorities;
+ req->keys_found = keys_found;
+ req->entries_ptr = entries_ptr;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < n_keys; j++)
+ if (new_rules[j])
+ rte_free(rules[j]);
+
+ for (j = 0; j < n_keys; j++)
+ rte_free(entries_ptr[j]);
+
+ rte_free(rules);
+ rte_free(new_rules);
+ rte_free(keys_found);
+ rte_free(entries_ptr);
+ return -1;
+ }
+
+ if (rsp->status) {
+ for (i = 0; i < n_keys; i++)
+ if (new_rules[i])
+ rte_free(rules[i]);
+
+ for (i = 0; i < n_keys; i++)
+ rte_free(entries_ptr[i]);
+
+ status = -1;
+ goto cleanup;
+ }
+
+ for (i = 0; i < n_keys; i++) {
+ if (entries_ptr[i] == NULL ||
+ ((new_rules[i] == 0) && (keys_found[i] == 0)) ||
+ ((new_rules[i] == 1) && (keys_found[i] == 1))) {
+ for (i = 0; i < n_keys; i++)
+ if (new_rules[i])
+ rte_free(rules[i]);
+
+ for (i = 0; i < n_keys; i++)
+ rte_free(entries_ptr[i]);
+
+ status = -1;
+ goto cleanup;
+ }
+ }
+
+ for (i = 0; i < n_keys; i++) {
+ memcpy(&rules[i]->key, &keys[i], sizeof(keys[i]));
+ rules[i]->priority = priorities[i];
+ rules[i]->port_id = port_ids[i];
+ rules[i]->entry_ptr = entries_ptr[i];
+
+ /* Commit rule */
+ if (new_rules[i]) {
+ TAILQ_INSERT_TAIL(&p->rules, rules[i], node);
+ p->n_rules++;
+ }
+
+ print_firewall_ipv4_rule(rules[i]);
+ }
+
+cleanup:
+ app_msg_free(app, rsp);
+ rte_free(rules);
+ rte_free(new_rules);
+ rte_free(keys_found);
+ rte_free(entries_ptr);
+
+ return status;
+}
+
+int
+app_pipeline_firewall_delete_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *keys,
+ uint32_t n_keys)
+{
+ struct app_pipeline_firewall *p;
+ struct pipeline_firewall_del_bulk_msg_req *req;
+ struct pipeline_firewall_del_bulk_msg_rsp *rsp;
+
+ struct app_pipeline_firewall_rule **rules;
+ int *keys_found;
+
+ uint32_t i;
+ int status = 0;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ rules = rte_malloc(NULL,
+ n_keys * sizeof(struct app_pipeline_firewall_rule *),
+ RTE_CACHE_LINE_SIZE);
+ if (rules == NULL)
+ return -1;
+
+ for (i = 0; i < n_keys; i++) {
+ if (app_pipeline_firewall_key_check_and_normalize(&keys[i]) != 0) {
+ return -1;
+ }
+
+ rules[i] = app_pipeline_firewall_rule_find(p, &keys[i]);
+ }
+
+ keys_found = rte_malloc(NULL,
+ n_keys * sizeof(int),
+ RTE_CACHE_LINE_SIZE);
+ if (keys_found == NULL) {
+ rte_free(rules);
+ return -1;
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ rte_free(rules);
+ rte_free(keys_found);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL_BULK;
+
+ req->keys = keys;
+ req->n_keys = n_keys;
+ req->keys_found = keys_found;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ rte_free(rules);
+ rte_free(keys_found);
+ return -1;
+ }
+
+ if (rsp->status) {
+ status = -1;
+ goto cleanup;
+ }
+
+ for (i = 0; i < n_keys; i++) {
+ if (keys_found[i] == 0) {
+ status = -1;
+ goto cleanup;
+ }
+ }
+
+ for (i = 0; i < n_keys; i++) {
+ TAILQ_REMOVE(&p->rules, rules[i], node);
+ p->n_rules--;
+ rte_free(rules[i]);
+ }
+
+cleanup:
+ app_msg_free(app, rsp);
+ rte_free(rules);
+ rte_free(keys_found);
+
+ return status;
+}
+
+int
+app_pipeline_firewall_add_default_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct app_pipeline_firewall *p;
+ struct pipeline_firewall_add_default_msg_req *req;
+ struct pipeline_firewall_add_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write rule */
+ if (rsp->status || (rsp->entry_ptr == NULL)) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ p->default_rule_port_id = port_id;
+ p->default_rule_entry_ptr = rsp->entry_ptr;
+
+ /* Commit rule */
+ p->default_rule_present = 1;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_firewall_delete_default_rule(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_firewall *p;
+ struct pipeline_firewall_del_default_msg_req *req;
+ struct pipeline_firewall_del_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_firewall);
+ if (p == NULL)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write rule */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Commit rule */
+ p->default_rule_present = 0;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+/*
+ * p firewall add ipv4
+ */
+
+struct cmd_firewall_add_ipv4_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t ipv4_string;
+ int32_t priority;
+ cmdline_ipaddr_t src_ip;
+ uint32_t src_ip_mask;
+ cmdline_ipaddr_t dst_ip;
+ uint32_t dst_ip_mask;
+ uint16_t src_port_from;
+ uint16_t src_port_to;
+ uint16_t dst_port_from;
+ uint16_t dst_port_to;
+ uint8_t proto;
+ uint8_t proto_mask;
+ uint8_t port_id;
+};
+
+static void
+cmd_firewall_add_ipv4_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_add_ipv4_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_firewall_key key;
+ int status;
+
+ key.type = PIPELINE_FIREWALL_IPV4_5TUPLE;
+ key.key.ipv4_5tuple.src_ip = rte_bswap32(
+ (uint32_t) params->src_ip.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.src_ip_mask = params->src_ip_mask;
+ key.key.ipv4_5tuple.dst_ip = rte_bswap32(
+ (uint32_t) params->dst_ip.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.dst_ip_mask = params->dst_ip_mask;
+ key.key.ipv4_5tuple.src_port_from = params->src_port_from;
+ key.key.ipv4_5tuple.src_port_to = params->src_port_to;
+ key.key.ipv4_5tuple.dst_port_from = params->dst_port_from;
+ key.key.ipv4_5tuple.dst_port_to = params->dst_port_to;
+ key.key.ipv4_5tuple.proto = params->proto;
+ key.key.ipv4_5tuple.proto_mask = params->proto_mask;
+
+ status = app_pipeline_firewall_add_rule(app,
+ params->pipeline_id,
+ &key,
+ params->priority,
+ params->port_id);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+cmdline_parse_token_string_t cmd_firewall_add_ipv4_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_add_ipv4_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_add_ipv4_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_firewall_add_ipv4_ipv4_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ ipv4_string, "ipv4");
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_priority =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, priority,
+ INT32);
+
+cmdline_parse_token_ipaddr_t cmd_firewall_add_ipv4_src_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_firewall_add_ipv4_result, src_ip);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_ip_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, src_ip_mask,
+ UINT32);
+
+cmdline_parse_token_ipaddr_t cmd_firewall_add_ipv4_dst_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_firewall_add_ipv4_result, dst_ip);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_ip_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result, dst_ip_mask,
+ UINT32);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_port_from =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ src_port_from, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_src_port_to =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ src_port_to, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_port_from =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ dst_port_from, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_dst_port_to =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ dst_port_to, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ proto, UINT8);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_proto_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ proto_mask, UINT8);
+
+cmdline_parse_token_num_t cmd_firewall_add_ipv4_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_ipv4_result,
+ port_id, UINT8);
+
+cmdline_parse_inst_t cmd_firewall_add_ipv4 = {
+ .f = cmd_firewall_add_ipv4_parsed,
+ .data = NULL,
+ .help_str = "Firewall rule add",
+ .tokens = {
+ (void *) &cmd_firewall_add_ipv4_p_string,
+ (void *) &cmd_firewall_add_ipv4_pipeline_id,
+ (void *) &cmd_firewall_add_ipv4_firewall_string,
+ (void *) &cmd_firewall_add_ipv4_add_string,
+ (void *) &cmd_firewall_add_ipv4_ipv4_string,
+ (void *) &cmd_firewall_add_ipv4_priority,
+ (void *) &cmd_firewall_add_ipv4_src_ip,
+ (void *) &cmd_firewall_add_ipv4_src_ip_mask,
+ (void *) &cmd_firewall_add_ipv4_dst_ip,
+ (void *) &cmd_firewall_add_ipv4_dst_ip_mask,
+ (void *) &cmd_firewall_add_ipv4_src_port_from,
+ (void *) &cmd_firewall_add_ipv4_src_port_to,
+ (void *) &cmd_firewall_add_ipv4_dst_port_from,
+ (void *) &cmd_firewall_add_ipv4_dst_port_to,
+ (void *) &cmd_firewall_add_ipv4_proto,
+ (void *) &cmd_firewall_add_ipv4_proto_mask,
+ (void *) &cmd_firewall_add_ipv4_port_id,
+ NULL,
+ },
+};
+
+/*
+ * p firewall del ipv4
+ */
+
+struct cmd_firewall_del_ipv4_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t ipv4_string;
+ cmdline_ipaddr_t src_ip;
+ uint32_t src_ip_mask;
+ cmdline_ipaddr_t dst_ip;
+ uint32_t dst_ip_mask;
+ uint16_t src_port_from;
+ uint16_t src_port_to;
+ uint16_t dst_port_from;
+ uint16_t dst_port_to;
+ uint8_t proto;
+ uint8_t proto_mask;
+};
+
+static void
+cmd_firewall_del_ipv4_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_del_ipv4_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_firewall_key key;
+ int status;
+
+ key.type = PIPELINE_FIREWALL_IPV4_5TUPLE;
+ key.key.ipv4_5tuple.src_ip = rte_bswap32(
+ (uint32_t) params->src_ip.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.src_ip_mask = params->src_ip_mask;
+ key.key.ipv4_5tuple.dst_ip = rte_bswap32(
+ (uint32_t) params->dst_ip.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.dst_ip_mask = params->dst_ip_mask;
+ key.key.ipv4_5tuple.src_port_from = params->src_port_from;
+ key.key.ipv4_5tuple.src_port_to = params->src_port_to;
+ key.key.ipv4_5tuple.dst_port_from = params->dst_port_from;
+ key.key.ipv4_5tuple.dst_port_to = params->dst_port_to;
+ key.key.ipv4_5tuple.proto = params->proto;
+ key.key.ipv4_5tuple.proto_mask = params->proto_mask;
+
+ status = app_pipeline_firewall_delete_rule(app,
+ params->pipeline_id,
+ &key);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+cmdline_parse_token_string_t cmd_firewall_del_ipv4_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_del_ipv4_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_del_ipv4_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ del_string, "del");
+
+cmdline_parse_token_string_t cmd_firewall_del_ipv4_ipv4_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ ipv4_string, "ipv4");
+
+cmdline_parse_token_ipaddr_t cmd_firewall_del_ipv4_src_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_ip);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_ip_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_ip_mask,
+ UINT32);
+
+cmdline_parse_token_ipaddr_t cmd_firewall_del_ipv4_dst_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_firewall_del_ipv4_result, dst_ip);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_ip_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, dst_ip_mask,
+ UINT32);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_port_from =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ src_port_from, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_src_port_to =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, src_port_to,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_port_from =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ dst_port_from, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_dst_port_to =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ dst_port_to, UINT16);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result,
+ proto, UINT8);
+
+cmdline_parse_token_num_t cmd_firewall_del_ipv4_proto_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_ipv4_result, proto_mask,
+ UINT8);
+
+cmdline_parse_inst_t cmd_firewall_del_ipv4 = {
+ .f = cmd_firewall_del_ipv4_parsed,
+ .data = NULL,
+ .help_str = "Firewall rule delete",
+ .tokens = {
+ (void *) &cmd_firewall_del_ipv4_p_string,
+ (void *) &cmd_firewall_del_ipv4_pipeline_id,
+ (void *) &cmd_firewall_del_ipv4_firewall_string,
+ (void *) &cmd_firewall_del_ipv4_del_string,
+ (void *) &cmd_firewall_del_ipv4_ipv4_string,
+ (void *) &cmd_firewall_del_ipv4_src_ip,
+ (void *) &cmd_firewall_del_ipv4_src_ip_mask,
+ (void *) &cmd_firewall_del_ipv4_dst_ip,
+ (void *) &cmd_firewall_del_ipv4_dst_ip_mask,
+ (void *) &cmd_firewall_del_ipv4_src_port_from,
+ (void *) &cmd_firewall_del_ipv4_src_port_to,
+ (void *) &cmd_firewall_del_ipv4_dst_port_from,
+ (void *) &cmd_firewall_del_ipv4_dst_port_to,
+ (void *) &cmd_firewall_del_ipv4_proto,
+ (void *) &cmd_firewall_del_ipv4_proto_mask,
+ NULL,
+ },
+};
+
+/*
+ * p firewall add bulk
+ */
+
+struct cmd_firewall_add_bulk_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t bulk_string;
+ cmdline_fixed_string_t file_path;
+};
+
+static void
+cmd_firewall_add_bulk_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_add_bulk_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ struct app_pipeline_add_bulk_params add_bulk_params;
+
+ status = app_pipeline_add_bulk_parse_file(params->file_path, &add_bulk_params);
+ if (status != 0) {
+ printf("Command failed\n");
+ goto end;
+ }
+
+ status = app_pipeline_firewall_add_bulk(app, params->pipeline_id, add_bulk_params.keys,
+ add_bulk_params.n_keys, add_bulk_params.priorities, add_bulk_params.port_ids);
+ if (status != 0) {
+ printf("Command failed\n");
+ goto end;
+ }
+
+end:
+ rte_free(add_bulk_params.keys);
+ rte_free(add_bulk_params.priorities);
+ rte_free(add_bulk_params.port_ids);
+}
+
+cmdline_parse_token_string_t cmd_firewall_add_bulk_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_firewall_add_bulk_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_bulk_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_add_bulk_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_add_bulk_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_firewall_add_bulk_bulk_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result,
+ bulk_string, "bulk");
+
+cmdline_parse_token_string_t cmd_firewall_add_bulk_file_path_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_bulk_result,
+ file_path, NULL);
+
+cmdline_parse_inst_t cmd_firewall_add_bulk = {
+ .f = cmd_firewall_add_bulk_parsed,
+ .data = NULL,
+ .help_str = "Firewall rule add bulk",
+ .tokens = {
+ (void *) &cmd_firewall_add_bulk_p_string,
+ (void *) &cmd_firewall_add_bulk_pipeline_id,
+ (void *) &cmd_firewall_add_bulk_firewall_string,
+ (void *) &cmd_firewall_add_bulk_add_string,
+ (void *) &cmd_firewall_add_bulk_bulk_string,
+ (void *) &cmd_firewall_add_bulk_file_path_string,
+ NULL,
+ },
+};
+
+/*
+ * p firewall del bulk
+ */
+
+struct cmd_firewall_del_bulk_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t bulk_string;
+ cmdline_fixed_string_t file_path;
+};
+
+static void
+cmd_firewall_del_bulk_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_del_bulk_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ struct app_pipeline_del_bulk_params del_bulk_params;
+
+ status = app_pipeline_del_bulk_parse_file(params->file_path, &del_bulk_params);
+ if (status != 0) {
+ printf("Command failed\n");
+ goto end;
+ }
+
+ status = app_pipeline_firewall_delete_bulk(app, params->pipeline_id,
+ del_bulk_params.keys, del_bulk_params.n_keys);
+ if (status != 0) {
+ printf("Command failed\n");
+ goto end;
+ }
+
+end:
+ rte_free(del_bulk_params.keys);
+}
+
+cmdline_parse_token_string_t cmd_firewall_del_bulk_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_firewall_del_bulk_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_bulk_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_del_bulk_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_del_bulk_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result,
+ del_string, "del");
+
+cmdline_parse_token_string_t cmd_firewall_del_bulk_bulk_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result,
+ bulk_string, "bulk");
+
+cmdline_parse_token_string_t cmd_firewall_del_bulk_file_path_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_bulk_result,
+ file_path, NULL);
+
+cmdline_parse_inst_t cmd_firewall_del_bulk = {
+ .f = cmd_firewall_del_bulk_parsed,
+ .data = NULL,
+ .help_str = "Firewall rule del bulk",
+ .tokens = {
+ (void *) &cmd_firewall_del_bulk_p_string,
+ (void *) &cmd_firewall_del_bulk_pipeline_id,
+ (void *) &cmd_firewall_del_bulk_firewall_string,
+ (void *) &cmd_firewall_del_bulk_add_string,
+ (void *) &cmd_firewall_del_bulk_bulk_string,
+ (void *) &cmd_firewall_del_bulk_file_path_string,
+ NULL,
+ },
+};
+
+/*
+ * p firewall add default
+ */
+struct cmd_firewall_add_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t default_string;
+ uint8_t port_id;
+};
+
+static void
+cmd_firewall_add_default_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_add_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_firewall_add_default_rule(app,
+ params->pipeline_id,
+ params->port_id);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+cmdline_parse_token_string_t cmd_firewall_add_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_firewall_add_default_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_default_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_add_default_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_add_default_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_firewall_add_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_add_default_result,
+ default_string, "default");
+
+cmdline_parse_token_num_t cmd_firewall_add_default_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_add_default_result, port_id,
+ UINT8);
+
+cmdline_parse_inst_t cmd_firewall_add_default = {
+ .f = cmd_firewall_add_default_parsed,
+ .data = NULL,
+ .help_str = "Firewall default rule add",
+ .tokens = {
+ (void *) &cmd_firewall_add_default_p_string,
+ (void *) &cmd_firewall_add_default_pipeline_id,
+ (void *) &cmd_firewall_add_default_firewall_string,
+ (void *) &cmd_firewall_add_default_add_string,
+ (void *) &cmd_firewall_add_default_default_string,
+ (void *) &cmd_firewall_add_default_port_id,
+ NULL,
+ },
+};
+
+/*
+ * p firewall del default
+ */
+struct cmd_firewall_del_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t default_string;
+};
+
+static void
+cmd_firewall_del_default_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_del_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_firewall_delete_default_rule(app,
+ params->pipeline_id);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+cmdline_parse_token_string_t cmd_firewall_del_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_firewall_del_default_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_del_default_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_del_default_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_del_default_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result,
+ del_string, "del");
+
+cmdline_parse_token_string_t cmd_firewall_del_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_del_default_result,
+ default_string, "default");
+
+cmdline_parse_inst_t cmd_firewall_del_default = {
+ .f = cmd_firewall_del_default_parsed,
+ .data = NULL,
+ .help_str = "Firewall default rule delete",
+ .tokens = {
+ (void *) &cmd_firewall_del_default_p_string,
+ (void *) &cmd_firewall_del_default_pipeline_id,
+ (void *) &cmd_firewall_del_default_firewall_string,
+ (void *) &cmd_firewall_del_default_del_string,
+ (void *) &cmd_firewall_del_default_default_string,
+ NULL,
+ },
+};
+
+/*
+ * p firewall ls
+ */
+
+struct cmd_firewall_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t firewall_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_firewall_ls_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_firewall_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_firewall_ls(app, params->pipeline_id);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+cmdline_parse_token_string_t cmd_firewall_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_firewall_ls_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_firewall_ls_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_firewall_ls_firewall_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result,
+ firewall_string, "firewall");
+
+cmdline_parse_token_string_t cmd_firewall_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_firewall_ls_result, ls_string,
+ "ls");
+
+cmdline_parse_inst_t cmd_firewall_ls = {
+ .f = cmd_firewall_ls_parsed,
+ .data = NULL,
+ .help_str = "Firewall rule list",
+ .tokens = {
+ (void *) &cmd_firewall_ls_p_string,
+ (void *) &cmd_firewall_ls_pipeline_id,
+ (void *) &cmd_firewall_ls_firewall_string,
+ (void *) &cmd_firewall_ls_ls_string,
+ NULL,
+ },
+};
+
+static cmdline_parse_ctx_t pipeline_cmds[] = {
+ (cmdline_parse_inst_t *) &cmd_firewall_add_ipv4,
+ (cmdline_parse_inst_t *) &cmd_firewall_del_ipv4,
+ (cmdline_parse_inst_t *) &cmd_firewall_add_bulk,
+ (cmdline_parse_inst_t *) &cmd_firewall_del_bulk,
+ (cmdline_parse_inst_t *) &cmd_firewall_add_default,
+ (cmdline_parse_inst_t *) &cmd_firewall_del_default,
+ (cmdline_parse_inst_t *) &cmd_firewall_ls,
+ NULL,
+};
+
+static struct pipeline_fe_ops pipeline_firewall_fe_ops = {
+ .f_init = app_pipeline_firewall_init,
+ .f_free = app_pipeline_firewall_free,
+ .cmds = pipeline_cmds,
+};
+
+struct pipeline_type pipeline_firewall = {
+ .name = "FIREWALL",
+ .be_ops = &pipeline_firewall_be_ops,
+ .fe_ops = &pipeline_firewall_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall.h b/examples/ip_pipeline/pipeline/pipeline_firewall.h
new file mode 100644
index 00000000..ccc4e64b
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_firewall.h
@@ -0,0 +1,77 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FIREWALL_H__
+#define __INCLUDE_PIPELINE_FIREWALL_H__
+
+#include "pipeline.h"
+#include "pipeline_firewall_be.h"
+
+int
+app_pipeline_firewall_add_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *key,
+ uint32_t priority,
+ uint32_t port_id);
+
+int
+app_pipeline_firewall_delete_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *key);
+
+int
+app_pipeline_firewall_add_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *keys,
+ uint32_t n_keys,
+ uint32_t *priorities,
+ uint32_t *port_ids);
+
+int
+app_pipeline_firewall_delete_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_firewall_key *keys,
+ uint32_t n_keys);
+
+int
+app_pipeline_firewall_add_default_rule(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_pipeline_firewall_delete_default_rule(struct app_params *app,
+ uint32_t pipeline_id);
+
+extern struct pipeline_type pipeline_firewall;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall_be.c b/examples/ip_pipeline/pipeline/pipeline_firewall_be.c
new file mode 100644
index 00000000..e7a8a4c5
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_firewall_be.c
@@ -0,0 +1,907 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ether.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_byteorder.h>
+#include <rte_table_acl.h>
+
+#include "pipeline_firewall_be.h"
+#include "parser.h"
+
+struct pipeline_firewall {
+ struct pipeline p;
+ pipeline_msg_req_handler custom_handlers[PIPELINE_FIREWALL_MSG_REQS];
+
+ uint32_t n_rules;
+ uint32_t n_rule_fields;
+ struct rte_acl_field_def *field_format;
+ uint32_t field_format_size;
+} __rte_cache_aligned;
+
+static void *
+pipeline_firewall_msg_req_custom_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler handlers[] = {
+ [PIPELINE_MSG_REQ_PING] =
+ pipeline_msg_req_ping_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_IN] =
+ pipeline_msg_req_stats_port_in_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_OUT] =
+ pipeline_msg_req_stats_port_out_handler,
+ [PIPELINE_MSG_REQ_STATS_TABLE] =
+ pipeline_msg_req_stats_table_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_ENABLE] =
+ pipeline_msg_req_port_in_enable_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_DISABLE] =
+ pipeline_msg_req_port_in_disable_handler,
+ [PIPELINE_MSG_REQ_CUSTOM] =
+ pipeline_firewall_msg_req_custom_handler,
+};
+
+static void *
+pipeline_firewall_msg_req_add_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_firewall_msg_req_del_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_firewall_msg_req_add_bulk_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_firewall_msg_req_del_bulk_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_firewall_msg_req_add_default_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_firewall_msg_req_del_default_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler custom_handlers[] = {
+ [PIPELINE_FIREWALL_MSG_REQ_ADD] =
+ pipeline_firewall_msg_req_add_handler,
+ [PIPELINE_FIREWALL_MSG_REQ_DEL] =
+ pipeline_firewall_msg_req_del_handler,
+ [PIPELINE_FIREWALL_MSG_REQ_ADD_BULK] =
+ pipeline_firewall_msg_req_add_bulk_handler,
+ [PIPELINE_FIREWALL_MSG_REQ_DEL_BULK] =
+ pipeline_firewall_msg_req_del_bulk_handler,
+ [PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT] =
+ pipeline_firewall_msg_req_add_default_handler,
+ [PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT] =
+ pipeline_firewall_msg_req_del_default_handler,
+};
+
+/*
+ * Firewall table
+ */
+struct firewall_table_entry {
+ struct rte_pipeline_table_entry head;
+};
+
+static struct rte_acl_field_def field_format_ipv4[] = {
+ /* Protocol */
+ [0] = {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = 0,
+ .input_index = 0,
+ .offset = sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+
+ /* Source IP address (IPv4) */
+ [1] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 1,
+ .input_index = 1,
+ .offset = sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, src_addr),
+ },
+
+ /* Destination IP address (IPv4) */
+ [2] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 2,
+ .input_index = 2,
+ .offset = sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, dst_addr),
+ },
+
+ /* Source Port */
+ [3] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 3,
+ .input_index = 3,
+ .offset = sizeof(struct ether_hdr) +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, src_port),
+ },
+
+ /* Destination Port */
+ [4] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 4,
+ .input_index = 4,
+ .offset = sizeof(struct ether_hdr) +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, dst_port),
+ },
+};
+
+#define SIZEOF_VLAN_HDR 4
+
+static struct rte_acl_field_def field_format_vlan_ipv4[] = {
+ /* Protocol */
+ [0] = {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = 0,
+ .input_index = 0,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_VLAN_HDR +
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+
+ /* Source IP address (IPv4) */
+ [1] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 1,
+ .input_index = 1,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_VLAN_HDR +
+ offsetof(struct ipv4_hdr, src_addr),
+ },
+
+ /* Destination IP address (IPv4) */
+ [2] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 2,
+ .input_index = 2,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_VLAN_HDR +
+ offsetof(struct ipv4_hdr, dst_addr),
+ },
+
+ /* Source Port */
+ [3] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 3,
+ .input_index = 3,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_VLAN_HDR +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, src_port),
+ },
+
+ /* Destination Port */
+ [4] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 4,
+ .input_index = 4,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_VLAN_HDR +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, dst_port),
+ },
+};
+
+#define SIZEOF_QINQ_HEADER 8
+
+static struct rte_acl_field_def field_format_qinq_ipv4[] = {
+ /* Protocol */
+ [0] = {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = 0,
+ .input_index = 0,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_QINQ_HEADER +
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+
+ /* Source IP address (IPv4) */
+ [1] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 1,
+ .input_index = 1,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_QINQ_HEADER +
+ offsetof(struct ipv4_hdr, src_addr),
+ },
+
+ /* Destination IP address (IPv4) */
+ [2] = {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = 2,
+ .input_index = 2,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_QINQ_HEADER +
+ offsetof(struct ipv4_hdr, dst_addr),
+ },
+
+ /* Source Port */
+ [3] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 3,
+ .input_index = 3,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_QINQ_HEADER +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, src_port),
+ },
+
+ /* Destination Port */
+ [4] = {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = 4,
+ .input_index = 4,
+ .offset = sizeof(struct ether_hdr) +
+ SIZEOF_QINQ_HEADER +
+ sizeof(struct ipv4_hdr) +
+ offsetof(struct tcp_hdr, dst_port),
+ },
+};
+
+static int
+pipeline_firewall_parse_args(struct pipeline_firewall *p,
+ struct pipeline_params *params)
+{
+ uint32_t n_rules_present = 0;
+ uint32_t pkt_type_present = 0;
+ uint32_t i;
+
+ /* defaults */
+ p->n_rules = 4 * 1024;
+ p->n_rule_fields = RTE_DIM(field_format_ipv4);
+ p->field_format = field_format_ipv4;
+ p->field_format_size = sizeof(field_format_ipv4);
+
+ for (i = 0; i < params->n_args; i++) {
+ char *arg_name = params->args_name[i];
+ char *arg_value = params->args_value[i];
+
+ if (strcmp(arg_name, "n_rules") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_rules_present == 0, params->name,
+ arg_name);
+ n_rules_present = 1;
+
+ status = parser_read_uint32(&p->n_rules,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+ continue;
+ }
+
+ if (strcmp(arg_name, "pkt_type") == 0) {
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ pkt_type_present == 0, params->name,
+ arg_name);
+ pkt_type_present = 1;
+
+ /* ipv4 */
+ if (strcmp(arg_value, "ipv4") == 0) {
+ p->n_rule_fields = RTE_DIM(field_format_ipv4);
+ p->field_format = field_format_ipv4;
+ p->field_format_size =
+ sizeof(field_format_ipv4);
+ continue;
+ }
+
+ /* vlan_ipv4 */
+ if (strcmp(arg_value, "vlan_ipv4") == 0) {
+ p->n_rule_fields =
+ RTE_DIM(field_format_vlan_ipv4);
+ p->field_format = field_format_vlan_ipv4;
+ p->field_format_size =
+ sizeof(field_format_vlan_ipv4);
+ continue;
+ }
+
+ /* qinq_ipv4 */
+ if (strcmp(arg_value, "qinq_ipv4") == 0) {
+ p->n_rule_fields =
+ RTE_DIM(field_format_qinq_ipv4);
+ p->field_format = field_format_qinq_ipv4;
+ p->field_format_size =
+ sizeof(field_format_qinq_ipv4);
+ continue;
+ }
+
+ /* other */
+ PIPELINE_PARSE_ERR_INV_VAL(0, params->name,
+ arg_name, arg_value);
+ }
+
+ /* other */
+ PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name);
+ }
+
+ return 0;
+}
+
+static void *
+pipeline_firewall_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline *p;
+ struct pipeline_firewall *p_fw;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_firewall));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ p_fw = (struct pipeline_firewall *) p;
+ if (p == NULL)
+ return NULL;
+
+ strcpy(p->name, params->name);
+ p->log_level = params->log_level;
+
+ PLOG(p, HIGH, "Firewall");
+
+ /* Parse arguments */
+ if (pipeline_firewall_parse_args(p_fw, params))
+ return NULL;
+
+ /* Pipeline */
+ {
+ struct rte_pipeline_params pipeline_params = {
+ .name = params->name,
+ .socket_id = params->socket_id,
+ .offset_port_id = 0,
+ };
+
+ p->p = rte_pipeline_create(&pipeline_params);
+ if (p->p == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Input ports */
+ p->n_ports_in = params->n_ports_in;
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_port_in_params port_params = {
+ .ops = pipeline_port_in_params_get_ops(
+ &params->port_in[i]),
+ .arg_create = pipeline_port_in_params_convert(
+ &params->port_in[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ .burst_size = params->port_in[i].burst_size,
+ };
+
+ int status = rte_pipeline_port_in_create(p->p,
+ &port_params,
+ &p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Output ports */
+ p->n_ports_out = params->n_ports_out;
+ for (i = 0; i < p->n_ports_out; i++) {
+ struct rte_pipeline_port_out_params port_params = {
+ .ops = pipeline_port_out_params_get_ops(
+ &params->port_out[i]),
+ .arg_create = pipeline_port_out_params_convert(
+ &params->port_out[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ };
+
+ int status = rte_pipeline_port_out_create(p->p,
+ &port_params,
+ &p->port_out_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Tables */
+ p->n_tables = 1;
+ {
+ struct rte_table_acl_params table_acl_params = {
+ .name = params->name,
+ .n_rules = p_fw->n_rules,
+ .n_rule_fields = p_fw->n_rule_fields,
+ };
+
+ struct rte_pipeline_table_params table_params = {
+ .ops = &rte_table_acl_ops,
+ .arg_create = &table_acl_params,
+ .f_action_hit = NULL,
+ .f_action_miss = NULL,
+ .arg_ah = NULL,
+ .action_data_size =
+ sizeof(struct firewall_table_entry) -
+ sizeof(struct rte_pipeline_table_entry),
+ };
+
+ int status;
+
+ memcpy(table_acl_params.field_format,
+ p_fw->field_format,
+ p_fw->field_format_size);
+
+ status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Connecting input ports to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_connect_to_table(p->p,
+ p->port_in_id[i],
+ p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Enable input ports */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_enable(p->p,
+ p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Check pipeline consistency */
+ if (rte_pipeline_check(p->p) < 0) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Message queues */
+ p->n_msgq = params->n_msgq;
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_in[i] = params->msgq_in[i];
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_out[i] = params->msgq_out[i];
+
+ /* Message handlers */
+ memcpy(p->handlers, handlers, sizeof(p->handlers));
+ memcpy(p_fw->custom_handlers,
+ custom_handlers,
+ sizeof(p_fw->custom_handlers));
+
+ return p;
+}
+
+static int
+pipeline_firewall_free(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return 0;
+}
+
+static int
+pipeline_firewall_track(void *pipeline,
+ __rte_unused uint32_t port_in,
+ uint32_t *port_out)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if ((p == NULL) ||
+ (port_in >= p->n_ports_in) ||
+ (port_out == NULL))
+ return -1;
+
+ if (p->n_ports_in == 1) {
+ *port_out = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+pipeline_firewall_timer(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ pipeline_msg_req_handle(p);
+ rte_pipeline_flush(p->p);
+
+ return 0;
+}
+
+void *
+pipeline_firewall_msg_req_custom_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_firewall *p_fw = (struct pipeline_firewall *) p;
+ struct pipeline_custom_msg_req *req = msg;
+ pipeline_msg_req_handler f_handle;
+
+ f_handle = (req->subtype < PIPELINE_FIREWALL_MSG_REQS) ?
+ p_fw->custom_handlers[req->subtype] :
+ pipeline_msg_req_invalid_handler;
+
+ if (f_handle == NULL)
+ f_handle = pipeline_msg_req_invalid_handler;
+
+ return f_handle(p, req);
+}
+
+void *
+pipeline_firewall_msg_req_add_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_add_msg_req *req = msg;
+ struct pipeline_firewall_add_msg_rsp *rsp = msg;
+
+ struct rte_table_acl_rule_add_params params;
+ struct firewall_table_entry entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+ };
+
+ memset(&params, 0, sizeof(params));
+
+ switch (req->key.type) {
+ case PIPELINE_FIREWALL_IPV4_5TUPLE:
+ params.priority = req->priority;
+ params.field_value[0].value.u8 =
+ req->key.key.ipv4_5tuple.proto;
+ params.field_value[0].mask_range.u8 =
+ req->key.key.ipv4_5tuple.proto_mask;
+ params.field_value[1].value.u32 =
+ req->key.key.ipv4_5tuple.src_ip;
+ params.field_value[1].mask_range.u32 =
+ req->key.key.ipv4_5tuple.src_ip_mask;
+ params.field_value[2].value.u32 =
+ req->key.key.ipv4_5tuple.dst_ip;
+ params.field_value[2].mask_range.u32 =
+ req->key.key.ipv4_5tuple.dst_ip_mask;
+ params.field_value[3].value.u16 =
+ req->key.key.ipv4_5tuple.src_port_from;
+ params.field_value[3].mask_range.u16 =
+ req->key.key.ipv4_5tuple.src_port_to;
+ params.field_value[4].value.u16 =
+ req->key.key.ipv4_5tuple.dst_port_from;
+ params.field_value[4].mask_range.u16 =
+ req->key.key.ipv4_5tuple.dst_port_to;
+ break;
+
+ default:
+ rsp->status = -1; /* Error */
+ return rsp;
+ }
+
+ rsp->status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &params,
+ (struct rte_pipeline_table_entry *) &entry,
+ &rsp->key_found,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_firewall_msg_req_del_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_del_msg_req *req = msg;
+ struct pipeline_firewall_del_msg_rsp *rsp = msg;
+
+ struct rte_table_acl_rule_delete_params params;
+
+ memset(&params, 0, sizeof(params));
+
+ switch (req->key.type) {
+ case PIPELINE_FIREWALL_IPV4_5TUPLE:
+ params.field_value[0].value.u8 =
+ req->key.key.ipv4_5tuple.proto;
+ params.field_value[0].mask_range.u8 =
+ req->key.key.ipv4_5tuple.proto_mask;
+ params.field_value[1].value.u32 =
+ req->key.key.ipv4_5tuple.src_ip;
+ params.field_value[1].mask_range.u32 =
+ req->key.key.ipv4_5tuple.src_ip_mask;
+ params.field_value[2].value.u32 =
+ req->key.key.ipv4_5tuple.dst_ip;
+ params.field_value[2].mask_range.u32 =
+ req->key.key.ipv4_5tuple.dst_ip_mask;
+ params.field_value[3].value.u16 =
+ req->key.key.ipv4_5tuple.src_port_from;
+ params.field_value[3].mask_range.u16 =
+ req->key.key.ipv4_5tuple.src_port_to;
+ params.field_value[4].value.u16 =
+ req->key.key.ipv4_5tuple.dst_port_from;
+ params.field_value[4].mask_range.u16 =
+ req->key.key.ipv4_5tuple.dst_port_to;
+ break;
+
+ default:
+ rsp->status = -1; /* Error */
+ return rsp;
+ }
+
+ rsp->status = rte_pipeline_table_entry_delete(p->p,
+ p->table_id[0],
+ &params,
+ &rsp->key_found,
+ NULL);
+
+ return rsp;
+}
+
+static void *
+pipeline_firewall_msg_req_add_bulk_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_add_bulk_msg_req *req = msg;
+ struct pipeline_firewall_add_bulk_msg_rsp *rsp = msg;
+
+ struct rte_table_acl_rule_add_params *params[req->n_keys];
+ struct firewall_table_entry *entries[req->n_keys];
+
+ uint32_t i, n_keys;
+
+ n_keys = req->n_keys;
+
+ for (i = 0; i < n_keys; i++) {
+ entries[i] = rte_malloc(NULL,
+ sizeof(struct firewall_table_entry),
+ RTE_CACHE_LINE_SIZE);
+ if (entries[i] == NULL) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ params[i] = rte_malloc(NULL,
+ sizeof(struct rte_table_acl_rule_add_params),
+ RTE_CACHE_LINE_SIZE);
+ if (params[i] == NULL) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ entries[i]->head.action = RTE_PIPELINE_ACTION_PORT;
+ entries[i]->head.port_id = p->port_out_id[req->port_ids[i]];
+
+ switch (req->keys[i].type) {
+ case PIPELINE_FIREWALL_IPV4_5TUPLE:
+ params[i]->priority = req->priorities[i];
+ params[i]->field_value[0].value.u8 =
+ req->keys[i].key.ipv4_5tuple.proto;
+ params[i]->field_value[0].mask_range.u8 =
+ req->keys[i].key.ipv4_5tuple.proto_mask;
+ params[i]->field_value[1].value.u32 =
+ req->keys[i].key.ipv4_5tuple.src_ip;
+ params[i]->field_value[1].mask_range.u32 =
+ req->keys[i].key.ipv4_5tuple.src_ip_mask;
+ params[i]->field_value[2].value.u32 =
+ req->keys[i].key.ipv4_5tuple.dst_ip;
+ params[i]->field_value[2].mask_range.u32 =
+ req->keys[i].key.ipv4_5tuple.dst_ip_mask;
+ params[i]->field_value[3].value.u16 =
+ req->keys[i].key.ipv4_5tuple.src_port_from;
+ params[i]->field_value[3].mask_range.u16 =
+ req->keys[i].key.ipv4_5tuple.src_port_to;
+ params[i]->field_value[4].value.u16 =
+ req->keys[i].key.ipv4_5tuple.dst_port_from;
+ params[i]->field_value[4].mask_range.u16 =
+ req->keys[i].key.ipv4_5tuple.dst_port_to;
+ break;
+
+ default:
+ rsp->status = -1; /* Error */
+
+ for (i = 0; i < n_keys; i++) {
+ rte_free(entries[i]);
+ rte_free(params[i]);
+ }
+
+ return rsp;
+ }
+ }
+
+ rsp->status = rte_pipeline_table_entry_add_bulk(p->p, p->table_id[0],
+ (void *)params, (struct rte_pipeline_table_entry **)entries,
+ n_keys, req->keys_found,
+ (struct rte_pipeline_table_entry **)req->entries_ptr);
+
+ for (i = 0; i < n_keys; i++) {
+ rte_free(entries[i]);
+ rte_free(params[i]);
+ }
+
+ return rsp;
+}
+
+static void *
+pipeline_firewall_msg_req_del_bulk_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_del_bulk_msg_req *req = msg;
+ struct pipeline_firewall_del_bulk_msg_rsp *rsp = msg;
+
+ struct rte_table_acl_rule_delete_params *params[req->n_keys];
+
+ uint32_t i, n_keys;
+
+ n_keys = req->n_keys;
+
+ for (i = 0; i < n_keys; i++) {
+ params[i] = rte_malloc(NULL,
+ sizeof(struct rte_table_acl_rule_delete_params),
+ RTE_CACHE_LINE_SIZE);
+ if (params[i] == NULL) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ switch (req->keys[i].type) {
+ case PIPELINE_FIREWALL_IPV4_5TUPLE:
+ params[i]->field_value[0].value.u8 =
+ req->keys[i].key.ipv4_5tuple.proto;
+ params[i]->field_value[0].mask_range.u8 =
+ req->keys[i].key.ipv4_5tuple.proto_mask;
+ params[i]->field_value[1].value.u32 =
+ req->keys[i].key.ipv4_5tuple.src_ip;
+ params[i]->field_value[1].mask_range.u32 =
+ req->keys[i].key.ipv4_5tuple.src_ip_mask;
+ params[i]->field_value[2].value.u32 =
+ req->keys[i].key.ipv4_5tuple.dst_ip;
+ params[i]->field_value[2].mask_range.u32 =
+ req->keys[i].key.ipv4_5tuple.dst_ip_mask;
+ params[i]->field_value[3].value.u16 =
+ req->keys[i].key.ipv4_5tuple.src_port_from;
+ params[i]->field_value[3].mask_range.u16 =
+ req->keys[i].key.ipv4_5tuple.src_port_to;
+ params[i]->field_value[4].value.u16 =
+ req->keys[i].key.ipv4_5tuple.dst_port_from;
+ params[i]->field_value[4].mask_range.u16 =
+ req->keys[i].key.ipv4_5tuple.dst_port_to;
+ break;
+
+ default:
+ rsp->status = -1; /* Error */
+
+ for (i = 0; i < n_keys; i++)
+ rte_free(params[i]);
+
+ return rsp;
+ }
+ }
+
+ rsp->status = rte_pipeline_table_entry_delete_bulk(p->p, p->table_id[0],
+ (void **)&params, n_keys, req->keys_found, NULL);
+
+ for (i = 0; i < n_keys; i++)
+ rte_free(params[i]);
+
+ return rsp;
+}
+
+void *
+pipeline_firewall_msg_req_add_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_add_default_msg_req *req = msg;
+ struct pipeline_firewall_add_default_msg_rsp *rsp = msg;
+
+ struct firewall_table_entry default_entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+ };
+
+ rsp->status = rte_pipeline_table_default_entry_add(p->p,
+ p->table_id[0],
+ (struct rte_pipeline_table_entry *) &default_entry,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_firewall_msg_req_del_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_firewall_del_default_msg_rsp *rsp = msg;
+
+ rsp->status = rte_pipeline_table_default_entry_delete(p->p,
+ p->table_id[0],
+ NULL);
+
+ return rsp;
+}
+
+struct pipeline_be_ops pipeline_firewall_be_ops = {
+ .f_init = pipeline_firewall_init,
+ .f_free = pipeline_firewall_free,
+ .f_run = NULL,
+ .f_timer = pipeline_firewall_timer,
+ .f_track = pipeline_firewall_track,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_firewall_be.h b/examples/ip_pipeline/pipeline/pipeline_firewall_be.h
new file mode 100644
index 00000000..f5b0522f
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_firewall_be.h
@@ -0,0 +1,176 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FIREWALL_BE_H__
+#define __INCLUDE_PIPELINE_FIREWALL_BE_H__
+
+#include "pipeline_common_be.h"
+
+enum pipeline_firewall_key_type {
+ PIPELINE_FIREWALL_IPV4_5TUPLE,
+};
+
+struct pipeline_firewall_key_ipv4_5tuple {
+ uint32_t src_ip;
+ uint32_t src_ip_mask;
+ uint32_t dst_ip;
+ uint32_t dst_ip_mask;
+ uint16_t src_port_from;
+ uint16_t src_port_to;
+ uint16_t dst_port_from;
+ uint16_t dst_port_to;
+ uint8_t proto;
+ uint8_t proto_mask;
+};
+
+struct pipeline_firewall_key {
+ enum pipeline_firewall_key_type type;
+ union {
+ struct pipeline_firewall_key_ipv4_5tuple ipv4_5tuple;
+ } key;
+};
+
+enum pipeline_firewall_msg_req_type {
+ PIPELINE_FIREWALL_MSG_REQ_ADD = 0,
+ PIPELINE_FIREWALL_MSG_REQ_DEL,
+ PIPELINE_FIREWALL_MSG_REQ_ADD_BULK,
+ PIPELINE_FIREWALL_MSG_REQ_DEL_BULK,
+ PIPELINE_FIREWALL_MSG_REQ_ADD_DEFAULT,
+ PIPELINE_FIREWALL_MSG_REQ_DEL_DEFAULT,
+ PIPELINE_FIREWALL_MSG_REQS
+};
+
+/*
+ * MSG ADD
+ */
+struct pipeline_firewall_add_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_firewall_key key;
+
+ /* data */
+ int32_t priority;
+ uint32_t port_id;
+};
+
+struct pipeline_firewall_add_msg_rsp {
+ int status;
+ int key_found;
+ void *entry_ptr;
+};
+
+/*
+ * MSG DEL
+ */
+struct pipeline_firewall_del_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_firewall_key key;
+};
+
+struct pipeline_firewall_del_msg_rsp {
+ int status;
+ int key_found;
+};
+
+/*
+ * MSG ADD BULK
+ */
+struct pipeline_firewall_add_bulk_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+
+ struct pipeline_firewall_key *keys;
+ uint32_t n_keys;
+
+ uint32_t *priorities;
+ uint32_t *port_ids;
+ int *keys_found;
+ void **entries_ptr;
+};
+struct pipeline_firewall_add_bulk_msg_rsp {
+ int status;
+};
+
+/*
+ * MSG DEL BULK
+ */
+struct pipeline_firewall_del_bulk_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_firewall_key *keys;
+ uint32_t n_keys;
+ int *keys_found;
+};
+
+struct pipeline_firewall_del_bulk_msg_rsp {
+ int status;
+};
+
+/*
+ * MSG ADD DEFAULT
+ */
+struct pipeline_firewall_add_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+
+ /* data */
+ uint32_t port_id;
+};
+
+struct pipeline_firewall_add_default_msg_rsp {
+ int status;
+ void *entry_ptr;
+};
+
+/*
+ * MSG DEL DEFAULT
+ */
+struct pipeline_firewall_del_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_firewall_msg_req_type subtype;
+};
+
+struct pipeline_firewall_del_default_msg_rsp {
+ int status;
+};
+
+extern struct pipeline_be_ops pipeline_firewall_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions.c b/examples/ip_pipeline/pipeline/pipeline_flow_actions.c
new file mode 100644
index 00000000..4012121f
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions.c
@@ -0,0 +1,1814 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#include <rte_common.h>
+#include <rte_hexdump.h>
+#include <rte_malloc.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "app.h"
+#include "pipeline_common_fe.h"
+#include "pipeline_flow_actions.h"
+#include "hash_func.h"
+
+/*
+ * Flow actions pipeline
+ */
+#ifndef N_FLOWS_BULK
+#define N_FLOWS_BULK 4096
+#endif
+
+struct app_pipeline_fa_flow {
+ struct pipeline_fa_flow_params params;
+ void *entry_ptr;
+};
+
+struct app_pipeline_fa_dscp {
+ uint32_t traffic_class;
+ enum rte_meter_color color;
+};
+
+struct app_pipeline_fa {
+ /* Parameters */
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+ struct pipeline_fa_params params;
+
+ /* Flows */
+ struct app_pipeline_fa_dscp dscp[PIPELINE_FA_N_DSCP];
+ struct app_pipeline_fa_flow *flows;
+} __rte_cache_aligned;
+
+static void*
+app_pipeline_fa_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct app_pipeline_fa *p;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_fa));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+
+ /* Initialization */
+ p->n_ports_in = params->n_ports_in;
+ p->n_ports_out = params->n_ports_out;
+ if (pipeline_fa_parse_args(&p->params, params)) {
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(
+ p->params.n_flows * sizeof(struct app_pipeline_fa_flow));
+ p->flows = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p->flows == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Initialization of flow table */
+ for (i = 0; i < p->params.n_flows; i++)
+ pipeline_fa_flow_params_set_default(&p->flows[i].params);
+
+ /* Initialization of DSCP table */
+ for (i = 0; i < RTE_DIM(p->dscp); i++) {
+ p->dscp[i].traffic_class = 0;
+ p->dscp[i].color = e_RTE_METER_GREEN;
+ }
+
+ return (void *) p;
+}
+
+static int
+app_pipeline_fa_free(void *pipeline)
+{
+ struct app_pipeline_fa *p = pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_free(p->flows);
+ rte_free(p);
+
+ return 0;
+}
+
+static int
+flow_params_check(struct app_pipeline_fa *p,
+ __rte_unused uint32_t meter_update_mask,
+ uint32_t policer_update_mask,
+ uint32_t port_update,
+ struct pipeline_fa_flow_params *params)
+{
+ uint32_t mask, i;
+
+ /* Meter */
+
+ /* Policer */
+ for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) {
+ struct pipeline_fa_policer_params *p = &params->p[i];
+ uint32_t j;
+
+ if ((mask & policer_update_mask) == 0)
+ continue;
+
+ for (j = 0; j < e_RTE_METER_COLORS; j++) {
+ struct pipeline_fa_policer_action *action =
+ &p->action[j];
+
+ if ((action->drop == 0) &&
+ (action->color >= e_RTE_METER_COLORS))
+ return -1;
+ }
+ }
+
+ /* Port */
+ if (port_update && (params->port_id >= p->n_ports_out))
+ return -1;
+
+ return 0;
+}
+
+int
+app_pipeline_fa_flow_config(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t flow_id,
+ uint32_t meter_update_mask,
+ uint32_t policer_update_mask,
+ uint32_t port_update,
+ struct pipeline_fa_flow_params *params)
+{
+ struct app_pipeline_fa *p;
+ struct app_pipeline_fa_flow *flow;
+
+ struct pipeline_fa_flow_config_msg_req *req;
+ struct pipeline_fa_flow_config_msg_rsp *rsp;
+
+ uint32_t i, mask;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ ((meter_update_mask == 0) &&
+ (policer_update_mask == 0) &&
+ (port_update == 0)) ||
+ (meter_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) ||
+ (policer_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) ||
+ (params == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ if (flow_params_check(p,
+ meter_update_mask,
+ policer_update_mask,
+ port_update,
+ params) != 0)
+ return -1;
+
+ flow_id %= p->params.n_flows;
+ flow = &p->flows[flow_id];
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FA_MSG_REQ_FLOW_CONFIG;
+ req->entry_ptr = flow->entry_ptr;
+ req->flow_id = flow_id;
+ req->meter_update_mask = meter_update_mask;
+ req->policer_update_mask = policer_update_mask;
+ req->port_update = port_update;
+ memcpy(&req->params, params, sizeof(*params));
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status ||
+ (rsp->entry_ptr == NULL)) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Commit flow */
+ for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) {
+ if ((mask & meter_update_mask) == 0)
+ continue;
+
+ memcpy(&flow->params.m[i], &params->m[i], sizeof(params->m[i]));
+ }
+
+ for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) {
+ if ((mask & policer_update_mask) == 0)
+ continue;
+
+ memcpy(&flow->params.p[i], &params->p[i], sizeof(params->p[i]));
+ }
+
+ if (port_update)
+ flow->params.port_id = params->port_id;
+
+ flow->entry_ptr = rsp->entry_ptr;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_fa_flow_config_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t *flow_id,
+ uint32_t n_flows,
+ uint32_t meter_update_mask,
+ uint32_t policer_update_mask,
+ uint32_t port_update,
+ struct pipeline_fa_flow_params *params)
+{
+ struct app_pipeline_fa *p;
+ struct pipeline_fa_flow_config_bulk_msg_req *req;
+ struct pipeline_fa_flow_config_bulk_msg_rsp *rsp;
+ void **req_entry_ptr;
+ uint32_t *req_flow_id;
+ uint32_t i;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (flow_id == NULL) ||
+ (n_flows == 0) ||
+ ((meter_update_mask == 0) &&
+ (policer_update_mask == 0) &&
+ (port_update == 0)) ||
+ (meter_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) ||
+ (policer_update_mask >= (1 << PIPELINE_FA_N_TC_MAX)) ||
+ (params == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ for (i = 0; i < n_flows; i++) {
+ struct pipeline_fa_flow_params *flow_params = &params[i];
+
+ if (flow_params_check(p,
+ meter_update_mask,
+ policer_update_mask,
+ port_update,
+ flow_params) != 0)
+ return -1;
+ }
+
+ /* Allocate and write request */
+ req_entry_ptr = (void **) rte_malloc(NULL,
+ n_flows * sizeof(void *),
+ RTE_CACHE_LINE_SIZE);
+ if (req_entry_ptr == NULL)
+ return -1;
+
+ req_flow_id = (uint32_t *) rte_malloc(NULL,
+ n_flows * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (req_flow_id == NULL) {
+ rte_free(req_entry_ptr);
+ return -1;
+ }
+
+ for (i = 0; i < n_flows; i++) {
+ uint32_t fid = flow_id[i] % p->params.n_flows;
+ struct app_pipeline_fa_flow *flow = &p->flows[fid];
+
+ req_flow_id[i] = fid;
+ req_entry_ptr[i] = flow->entry_ptr;
+ }
+
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ rte_free(req_flow_id);
+ rte_free(req_entry_ptr);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK;
+ req->entry_ptr = req_entry_ptr;
+ req->flow_id = req_flow_id;
+ req->n_flows = n_flows;
+ req->meter_update_mask = meter_update_mask;
+ req->policer_update_mask = policer_update_mask;
+ req->port_update = port_update;
+ req->params = params;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ rte_free(req_flow_id);
+ rte_free(req_entry_ptr);
+ return -1;
+ }
+
+ /* Read response */
+
+ /* Commit flows */
+ for (i = 0; i < rsp->n_flows; i++) {
+ uint32_t fid = flow_id[i] % p->params.n_flows;
+ struct app_pipeline_fa_flow *flow = &p->flows[fid];
+ struct pipeline_fa_flow_params *flow_params = &params[i];
+ void *entry_ptr = req_entry_ptr[i];
+ uint32_t j, mask;
+
+ for (j = 0, mask = 1; j < PIPELINE_FA_N_TC_MAX;
+ j++, mask <<= 1) {
+ if ((mask & meter_update_mask) == 0)
+ continue;
+
+ memcpy(&flow->params.m[j],
+ &flow_params->m[j],
+ sizeof(flow_params->m[j]));
+ }
+
+ for (j = 0, mask = 1; j < PIPELINE_FA_N_TC_MAX;
+ j++, mask <<= 1) {
+ if ((mask & policer_update_mask) == 0)
+ continue;
+
+ memcpy(&flow->params.p[j],
+ &flow_params->p[j],
+ sizeof(flow_params->p[j]));
+ }
+
+ if (port_update)
+ flow->params.port_id = flow_params->port_id;
+
+ flow->entry_ptr = entry_ptr;
+ }
+
+ /* Free response */
+ app_msg_free(app, rsp);
+ rte_free(req_flow_id);
+ rte_free(req_entry_ptr);
+
+ return (rsp->n_flows == n_flows) ? 0 : -1;
+}
+
+int
+app_pipeline_fa_dscp_config(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t dscp,
+ uint32_t traffic_class,
+ enum rte_meter_color color)
+{
+ struct app_pipeline_fa *p;
+
+ struct pipeline_fa_dscp_config_msg_req *req;
+ struct pipeline_fa_dscp_config_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (dscp >= PIPELINE_FA_N_DSCP) ||
+ (traffic_class >= PIPELINE_FA_N_TC_MAX) ||
+ (color >= e_RTE_METER_COLORS))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ if (p->params.dscp_enabled == 0)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FA_MSG_REQ_DSCP_CONFIG;
+ req->dscp = dscp;
+ req->traffic_class = traffic_class;
+ req->color = color;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Commit DSCP */
+ p->dscp[dscp].traffic_class = traffic_class;
+ p->dscp[dscp].color = color;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_fa_flow_policer_stats_read(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t flow_id,
+ uint32_t policer_id,
+ int clear,
+ struct pipeline_fa_policer_stats *stats)
+{
+ struct app_pipeline_fa *p;
+ struct app_pipeline_fa_flow *flow;
+
+ struct pipeline_fa_policer_stats_msg_req *req;
+ struct pipeline_fa_policer_stats_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if ((app == NULL) || (stats == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ flow_id %= p->params.n_flows;
+ flow = &p->flows[flow_id];
+
+ if ((policer_id >= p->params.n_meters_per_flow) ||
+ (flow->entry_ptr == NULL))
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FA_MSG_REQ_POLICER_STATS_READ;
+ req->entry_ptr = flow->entry_ptr;
+ req->policer_id = policer_id;
+ req->clear = clear;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ memcpy(stats, &rsp->stats, sizeof(*stats));
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+static const char *
+color_to_string(enum rte_meter_color color)
+{
+ switch (color) {
+ case e_RTE_METER_GREEN: return "G";
+ case e_RTE_METER_YELLOW: return "Y";
+ case e_RTE_METER_RED: return "R";
+ default: return "?";
+ }
+}
+
+static int
+string_to_color(char *s, enum rte_meter_color *c)
+{
+ if (strcmp(s, "G") == 0) {
+ *c = e_RTE_METER_GREEN;
+ return 0;
+ }
+
+ if (strcmp(s, "Y") == 0) {
+ *c = e_RTE_METER_YELLOW;
+ return 0;
+ }
+
+ if (strcmp(s, "R") == 0) {
+ *c = e_RTE_METER_RED;
+ return 0;
+ }
+
+ return -1;
+}
+
+static const char *
+policer_action_to_string(struct pipeline_fa_policer_action *a)
+{
+ if (a->drop)
+ return "D";
+
+ return color_to_string(a->color);
+}
+
+static int
+string_to_policer_action(char *s, struct pipeline_fa_policer_action *a)
+{
+ if (strcmp(s, "G") == 0) {
+ a->drop = 0;
+ a->color = e_RTE_METER_GREEN;
+ return 0;
+ }
+
+ if (strcmp(s, "Y") == 0) {
+ a->drop = 0;
+ a->color = e_RTE_METER_YELLOW;
+ return 0;
+ }
+
+ if (strcmp(s, "R") == 0) {
+ a->drop = 0;
+ a->color = e_RTE_METER_RED;
+ return 0;
+ }
+
+ if (strcmp(s, "D") == 0) {
+ a->drop = 1;
+ a->color = e_RTE_METER_GREEN;
+ return 0;
+ }
+
+ return -1;
+}
+
+static void
+print_flow(struct app_pipeline_fa *p,
+ uint32_t flow_id,
+ struct app_pipeline_fa_flow *flow)
+{
+ uint32_t i;
+
+ printf("Flow ID = %" PRIu32 "\n", flow_id);
+
+ for (i = 0; i < p->params.n_meters_per_flow; i++) {
+ struct rte_meter_trtcm_params *meter = &flow->params.m[i];
+ struct pipeline_fa_policer_params *policer = &flow->params.p[i];
+
+ printf("\ttrTCM [CIR = %" PRIu64
+ ", CBS = %" PRIu64 ", PIR = %" PRIu64
+ ", PBS = %" PRIu64 "] Policer [G : %s, Y : %s, R : %s]\n",
+ meter->cir,
+ meter->cbs,
+ meter->pir,
+ meter->pbs,
+ policer_action_to_string(&policer->action[e_RTE_METER_GREEN]),
+ policer_action_to_string(&policer->action[e_RTE_METER_YELLOW]),
+ policer_action_to_string(&policer->action[e_RTE_METER_RED]));
+ }
+
+ printf("\tPort %u (entry_ptr = %p)\n",
+ flow->params.port_id,
+ flow->entry_ptr);
+}
+
+
+static int
+app_pipeline_fa_flow_ls(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_fa *p;
+ uint32_t i;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ for (i = 0; i < p->params.n_flows; i++) {
+ struct app_pipeline_fa_flow *flow = &p->flows[i];
+
+ print_flow(p, i, flow);
+ }
+
+ return 0;
+}
+
+static int
+app_pipeline_fa_dscp_ls(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_fa *p;
+ uint32_t i;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id,
+ &pipeline_flow_actions);
+ if (p == NULL)
+ return -1;
+
+ if (p->params.dscp_enabled == 0)
+ return -1;
+
+ for (i = 0; i < RTE_DIM(p->dscp); i++) {
+ struct app_pipeline_fa_dscp *dscp = &p->dscp[i];
+
+ printf("DSCP = %2" PRIu32 ": Traffic class = %" PRIu32
+ ", Color = %s\n",
+ i,
+ dscp->traffic_class,
+ color_to_string(dscp->color));
+ }
+
+ return 0;
+}
+
+/*
+ * Flow meter configuration (single flow)
+ *
+ * p <pipeline ID> flow <flow ID> meter <meter ID> trtcm <trtcm params>
+ */
+
+struct cmd_fa_meter_config_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ uint32_t flow_id;
+ cmdline_fixed_string_t meter_string;
+ uint32_t meter_id;
+ cmdline_fixed_string_t trtcm_string;
+ uint64_t cir;
+ uint64_t pir;
+ uint64_t cbs;
+ uint64_t pbs;
+};
+
+static void
+cmd_fa_meter_config_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_meter_config_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params flow_params;
+ int status;
+
+ if (params->meter_id >= PIPELINE_FA_N_TC_MAX) {
+ printf("Command failed\n");
+ return;
+ }
+
+ flow_params.m[params->meter_id].cir = params->cir;
+ flow_params.m[params->meter_id].pir = params->pir;
+ flow_params.m[params->meter_id].cbs = params->cbs;
+ flow_params.m[params->meter_id].pbs = params->pbs;
+
+ status = app_pipeline_fa_flow_config(app,
+ params->pipeline_id,
+ params->flow_id,
+ 1 << params->meter_id,
+ 0,
+ 0,
+ &flow_params);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fa_meter_config_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result,
+ flow_string, "flow");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result,
+ flow_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_meter_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result,
+ meter_string, "meter");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_meter_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result,
+ meter_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_trtcm_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_result,
+ trtcm_string, "trtcm");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_cir =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, cir, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_pir =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, pir, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_cbs =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, cbs, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_pbs =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_result, pbs, UINT64);
+
+cmdline_parse_inst_t cmd_fa_meter_config = {
+ .f = cmd_fa_meter_config_parsed,
+ .data = NULL,
+ .help_str = "Flow meter configuration (single flow) ",
+ .tokens = {
+ (void *) &cmd_fa_meter_config_p_string,
+ (void *) &cmd_fa_meter_config_pipeline_id,
+ (void *) &cmd_fa_meter_config_flow_string,
+ (void *) &cmd_fa_meter_config_flow_id,
+ (void *) &cmd_fa_meter_config_meter_string,
+ (void *) &cmd_fa_meter_config_meter_id,
+ (void *) &cmd_fa_meter_config_trtcm_string,
+ (void *) &cmd_fa_meter_config_cir,
+ (void *) &cmd_fa_meter_config_pir,
+ (void *) &cmd_fa_meter_config_cbs,
+ (void *) &cmd_fa_meter_config_pbs,
+ NULL,
+ },
+};
+
+/*
+ * Flow meter configuration (multiple flows)
+ *
+ * p <pipeline ID> flows <n_flows> meter <meter ID> trtcm <trtcm params>
+ */
+
+struct cmd_fa_meter_config_bulk_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flows_string;
+ uint32_t n_flows;
+ cmdline_fixed_string_t meter_string;
+ uint32_t meter_id;
+ cmdline_fixed_string_t trtcm_string;
+ uint64_t cir;
+ uint64_t pir;
+ uint64_t cbs;
+ uint64_t pbs;
+};
+
+static void
+cmd_fa_meter_config_bulk_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_meter_config_bulk_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params flow_template, *flow_params;
+ uint32_t *flow_id;
+ uint32_t i;
+
+ if ((params->n_flows == 0) ||
+ (params->meter_id >= PIPELINE_FA_N_TC_MAX)) {
+ printf("Invalid arguments\n");
+ return;
+ }
+
+ flow_id = (uint32_t *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_params == NULL) {
+ rte_free(flow_id);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ memset(&flow_template, 0, sizeof(flow_template));
+ flow_template.m[params->meter_id].cir = params->cir;
+ flow_template.m[params->meter_id].pir = params->pir;
+ flow_template.m[params->meter_id].cbs = params->cbs;
+ flow_template.m[params->meter_id].pbs = params->pbs;
+
+ for (i = 0; i < params->n_flows; i++) {
+ uint32_t pos = i % N_FLOWS_BULK;
+
+ flow_id[pos] = i;
+ memcpy(&flow_params[pos],
+ &flow_template,
+ sizeof(flow_template));
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (i == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fa_flow_config_bulk(app,
+ params->pipeline_id,
+ flow_id,
+ pos + 1,
+ 1 << params->meter_id,
+ 0,
+ 0,
+ flow_params);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ rte_free(flow_params);
+ rte_free(flow_id);
+
+}
+
+cmdline_parse_token_string_t cmd_fa_meter_config_bulk_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_bulk_flows_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ flows_string, "flows");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ n_flows, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_bulk_meter_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ meter_string, "meter");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_meter_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ meter_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_meter_config_bulk_trtcm_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ trtcm_string, "trtcm");
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_cir =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ cir, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pir =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ pir, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_cbs =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ cbs, UINT64);
+
+cmdline_parse_token_num_t cmd_fa_meter_config_bulk_pbs =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_meter_config_bulk_result,
+ pbs, UINT64);
+
+cmdline_parse_inst_t cmd_fa_meter_config_bulk = {
+ .f = cmd_fa_meter_config_bulk_parsed,
+ .data = NULL,
+ .help_str = "Flow meter configuration (multiple flows)",
+ .tokens = {
+ (void *) &cmd_fa_meter_config_bulk_p_string,
+ (void *) &cmd_fa_meter_config_bulk_pipeline_id,
+ (void *) &cmd_fa_meter_config_bulk_flows_string,
+ (void *) &cmd_fa_meter_config_bulk_n_flows,
+ (void *) &cmd_fa_meter_config_bulk_meter_string,
+ (void *) &cmd_fa_meter_config_bulk_meter_id,
+ (void *) &cmd_fa_meter_config_bulk_trtcm_string,
+ (void *) &cmd_fa_meter_config_cir,
+ (void *) &cmd_fa_meter_config_pir,
+ (void *) &cmd_fa_meter_config_cbs,
+ (void *) &cmd_fa_meter_config_pbs,
+ NULL,
+ },
+};
+
+/*
+ * Flow policer configuration (single flow)
+ *
+ * p <pipeline ID> flow <flow ID> policer <policer ID>
+ * G <action> Y <action> R <action>
+ *
+ * <action> = G (green) | Y (yellow) | R (red) | D (drop)
+ */
+
+struct cmd_fa_policer_config_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ uint32_t flow_id;
+ cmdline_fixed_string_t policer_string;
+ uint32_t policer_id;
+ cmdline_fixed_string_t green_string;
+ cmdline_fixed_string_t g_action;
+ cmdline_fixed_string_t yellow_string;
+ cmdline_fixed_string_t y_action;
+ cmdline_fixed_string_t red_string;
+ cmdline_fixed_string_t r_action;
+};
+
+static void
+cmd_fa_policer_config_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_policer_config_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params flow_params;
+ int status;
+
+ if (params->policer_id >= PIPELINE_FA_N_TC_MAX) {
+ printf("Command failed\n");
+ return;
+ }
+
+ status = string_to_policer_action(params->g_action,
+ &flow_params.p[params->policer_id].action[e_RTE_METER_GREEN]);
+ if (status)
+ printf("Invalid policer green action\n");
+
+ status = string_to_policer_action(params->y_action,
+ &flow_params.p[params->policer_id].action[e_RTE_METER_YELLOW]);
+ if (status)
+ printf("Invalid policer yellow action\n");
+
+ status = string_to_policer_action(params->r_action,
+ &flow_params.p[params->policer_id].action[e_RTE_METER_RED]);
+ if (status)
+ printf("Invalid policer red action\n");
+
+ status = app_pipeline_fa_flow_config(app,
+ params->pipeline_id,
+ params->flow_id,
+ 0,
+ 1 << params->policer_id,
+ 0,
+ &flow_params);
+
+ if (status != 0)
+ printf("Command failed\n");
+
+}
+
+cmdline_parse_token_string_t cmd_fa_policer_config_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ flow_string, "flow");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result,
+ flow_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_policer_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ policer_string, "policer");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_policer_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_result,
+ policer_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_green_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ green_string, "G");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_g_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ g_action, "R#Y#G#D");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_yellow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ yellow_string, "Y");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_y_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ y_action, "R#Y#G#D");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_red_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ red_string, "R");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_r_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_result,
+ r_action, "R#Y#G#D");
+
+cmdline_parse_inst_t cmd_fa_policer_config = {
+ .f = cmd_fa_policer_config_parsed,
+ .data = NULL,
+ .help_str = "Flow policer configuration (single flow)",
+ .tokens = {
+ (void *) &cmd_fa_policer_config_p_string,
+ (void *) &cmd_fa_policer_config_pipeline_id,
+ (void *) &cmd_fa_policer_config_flow_string,
+ (void *) &cmd_fa_policer_config_flow_id,
+ (void *) &cmd_fa_policer_config_policer_string,
+ (void *) &cmd_fa_policer_config_policer_id,
+ (void *) &cmd_fa_policer_config_green_string,
+ (void *) &cmd_fa_policer_config_g_action,
+ (void *) &cmd_fa_policer_config_yellow_string,
+ (void *) &cmd_fa_policer_config_y_action,
+ (void *) &cmd_fa_policer_config_red_string,
+ (void *) &cmd_fa_policer_config_r_action,
+ NULL,
+ },
+};
+
+/*
+ * Flow policer configuration (multiple flows)
+ *
+ * p <pipeline ID> flows <n_flows> policer <policer ID>
+ * G <action> Y <action> R <action>
+ *
+ * <action> = G (green) | Y (yellow) | R (red) | D (drop)
+ */
+
+struct cmd_fa_policer_config_bulk_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flows_string;
+ uint32_t n_flows;
+ cmdline_fixed_string_t policer_string;
+ uint32_t policer_id;
+ cmdline_fixed_string_t green_string;
+ cmdline_fixed_string_t g_action;
+ cmdline_fixed_string_t yellow_string;
+ cmdline_fixed_string_t y_action;
+ cmdline_fixed_string_t red_string;
+ cmdline_fixed_string_t r_action;
+};
+
+static void
+cmd_fa_policer_config_bulk_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_policer_config_bulk_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params flow_template, *flow_params;
+ uint32_t *flow_id, i;
+ int status;
+
+ if ((params->n_flows == 0) ||
+ (params->policer_id >= PIPELINE_FA_N_TC_MAX)) {
+ printf("Invalid arguments\n");
+ return;
+ }
+
+ flow_id = (uint32_t *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_params == NULL) {
+ rte_free(flow_id);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ memset(&flow_template, 0, sizeof(flow_template));
+
+ status = string_to_policer_action(params->g_action,
+ &flow_template.p[params->policer_id].action[e_RTE_METER_GREEN]);
+ if (status)
+ printf("Invalid policer green action\n");
+
+ status = string_to_policer_action(params->y_action,
+ &flow_template.p[params->policer_id].action[e_RTE_METER_YELLOW]);
+ if (status)
+ printf("Invalid policer yellow action\n");
+
+ status = string_to_policer_action(params->r_action,
+ &flow_template.p[params->policer_id].action[e_RTE_METER_RED]);
+ if (status)
+ printf("Invalid policer red action\n");
+
+ for (i = 0; i < params->n_flows; i++) {
+ uint32_t pos = i % N_FLOWS_BULK;
+
+ flow_id[pos] = i;
+ memcpy(&flow_params[pos], &flow_template,
+ sizeof(flow_template));
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (i == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fa_flow_config_bulk(app,
+ params->pipeline_id,
+ flow_id,
+ pos + 1,
+ 0,
+ 1 << params->policer_id,
+ 0,
+ flow_params);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ rte_free(flow_params);
+ rte_free(flow_id);
+
+}
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_bulk_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_flows_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ flows_string, "flows");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_bulk_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ n_flows, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_policer_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ policer_string, "policer");
+
+cmdline_parse_token_num_t cmd_fa_policer_config_bulk_policer_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ policer_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_green_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ green_string, "G");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_g_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ g_action, "R#Y#G#D");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_yellow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ yellow_string, "Y");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_y_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ y_action, "R#Y#G#D");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_red_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ red_string, "R");
+
+cmdline_parse_token_string_t cmd_fa_policer_config_bulk_r_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_config_bulk_result,
+ r_action, "R#Y#G#D");
+
+cmdline_parse_inst_t cmd_fa_policer_config_bulk = {
+ .f = cmd_fa_policer_config_bulk_parsed,
+ .data = NULL,
+ .help_str = "Flow policer configuration (multiple flows)",
+ .tokens = {
+ (void *) &cmd_fa_policer_config_bulk_p_string,
+ (void *) &cmd_fa_policer_config_bulk_pipeline_id,
+ (void *) &cmd_fa_policer_config_bulk_flows_string,
+ (void *) &cmd_fa_policer_config_bulk_n_flows,
+ (void *) &cmd_fa_policer_config_bulk_policer_string,
+ (void *) &cmd_fa_policer_config_bulk_policer_id,
+ (void *) &cmd_fa_policer_config_bulk_green_string,
+ (void *) &cmd_fa_policer_config_bulk_g_action,
+ (void *) &cmd_fa_policer_config_bulk_yellow_string,
+ (void *) &cmd_fa_policer_config_bulk_y_action,
+ (void *) &cmd_fa_policer_config_bulk_red_string,
+ (void *) &cmd_fa_policer_config_bulk_r_action,
+ NULL,
+ },
+};
+
+/*
+ * Flow output port configuration (single flow)
+ *
+ * p <pipeline ID> flow <flow ID> port <port ID>
+ */
+
+struct cmd_fa_output_port_config_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ uint32_t flow_id;
+ cmdline_fixed_string_t port_string;
+ uint32_t port_id;
+};
+
+static void
+cmd_fa_output_port_config_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_output_port_config_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params flow_params;
+ int status;
+
+ flow_params.port_id = params->port_id;
+
+ status = app_pipeline_fa_flow_config(app,
+ params->pipeline_id,
+ params->flow_id,
+ 0,
+ 0,
+ 1,
+ &flow_params);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result,
+ flow_string, "flow");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result,
+ flow_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_result,
+ port_string, "port");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_result,
+ port_id, UINT32);
+
+cmdline_parse_inst_t cmd_fa_output_port_config = {
+ .f = cmd_fa_output_port_config_parsed,
+ .data = NULL,
+ .help_str = "Flow output port configuration (single flow)",
+ .tokens = {
+ (void *) &cmd_fa_output_port_config_p_string,
+ (void *) &cmd_fa_output_port_config_pipeline_id,
+ (void *) &cmd_fa_output_port_config_flow_string,
+ (void *) &cmd_fa_output_port_config_flow_id,
+ (void *) &cmd_fa_output_port_config_port_string,
+ (void *) &cmd_fa_output_port_config_port_id,
+ NULL,
+ },
+};
+
+/*
+ * Flow output port configuration (multiple flows)
+ *
+ * p <pipeline ID> flows <n_flows> ports <n_ports>
+ */
+
+struct cmd_fa_output_port_config_bulk_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flows_string;
+ uint32_t n_flows;
+ cmdline_fixed_string_t ports_string;
+ uint32_t n_ports;
+};
+
+static void
+cmd_fa_output_port_config_bulk_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_output_port_config_bulk_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_flow_params *flow_params;
+ uint32_t *flow_id;
+ uint32_t i;
+
+ if (params->n_flows == 0) {
+ printf("Invalid arguments\n");
+ return;
+ }
+
+ flow_id = (uint32_t *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_params = (struct pipeline_fa_flow_params *) rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(struct pipeline_fa_flow_params),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_params == NULL) {
+ rte_free(flow_id);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ for (i = 0; i < params->n_flows; i++) {
+ uint32_t pos = i % N_FLOWS_BULK;
+ uint32_t port_id = i % params->n_ports;
+
+ flow_id[pos] = i;
+
+ memset(&flow_params[pos], 0, sizeof(flow_params[pos]));
+ flow_params[pos].port_id = port_id;
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (i == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fa_flow_config_bulk(app,
+ params->pipeline_id,
+ flow_id,
+ pos + 1,
+ 0,
+ 0,
+ 1,
+ flow_params);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ rte_free(flow_params);
+ rte_free(flow_id);
+
+}
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_flows_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ flows_string, "flows");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ n_flows, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_output_port_config_bulk_ports_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ ports_string, "ports");
+
+cmdline_parse_token_num_t cmd_fa_output_port_config_bulk_n_ports =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_output_port_config_bulk_result,
+ n_ports, UINT32);
+
+cmdline_parse_inst_t cmd_fa_output_port_config_bulk = {
+ .f = cmd_fa_output_port_config_bulk_parsed,
+ .data = NULL,
+ .help_str = "Flow output port configuration (multiple flows)",
+ .tokens = {
+ (void *) &cmd_fa_output_port_config_bulk_p_string,
+ (void *) &cmd_fa_output_port_config_bulk_pipeline_id,
+ (void *) &cmd_fa_output_port_config_bulk_flows_string,
+ (void *) &cmd_fa_output_port_config_bulk_n_flows,
+ (void *) &cmd_fa_output_port_config_bulk_ports_string,
+ (void *) &cmd_fa_output_port_config_bulk_n_ports,
+ NULL,
+ },
+};
+
+/*
+ * Flow DiffServ Code Point (DSCP) translation table configuration
+ *
+ * p <pipeline ID> dscp <DSCP ID> class <traffic class ID> color <color>
+ *
+ * <color> = G (green) | Y (yellow) | R (red)
+*/
+
+struct cmd_fa_dscp_config_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t dscp_string;
+ uint32_t dscp_id;
+ cmdline_fixed_string_t class_string;
+ uint32_t traffic_class_id;
+ cmdline_fixed_string_t color_string;
+ cmdline_fixed_string_t color;
+
+};
+
+static void
+cmd_fa_dscp_config_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_dscp_config_result *params = parsed_result;
+ struct app_params *app = data;
+ enum rte_meter_color color;
+ int status;
+
+ status = string_to_color(params->color, &color);
+ if (status) {
+ printf("Invalid color\n");
+ return;
+ }
+
+ status = app_pipeline_fa_dscp_config(app,
+ params->pipeline_id,
+ params->dscp_id,
+ params->traffic_class_id,
+ color);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fa_dscp_config_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_dscp_config_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_dscp_config_dscp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result,
+ dscp_string, "dscp");
+
+cmdline_parse_token_num_t cmd_fa_dscp_config_dscp_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result,
+ dscp_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_dscp_config_class_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result,
+ class_string, "class");
+
+cmdline_parse_token_num_t cmd_fa_dscp_config_traffic_class_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_config_result,
+ traffic_class_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_dscp_config_color_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result,
+ color_string, "color");
+
+cmdline_parse_token_string_t cmd_fa_dscp_config_color =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_config_result,
+ color, "G#Y#R");
+
+cmdline_parse_inst_t cmd_fa_dscp_config = {
+ .f = cmd_fa_dscp_config_parsed,
+ .data = NULL,
+ .help_str = "Flow DSCP translation table configuration",
+ .tokens = {
+ (void *) &cmd_fa_dscp_config_p_string,
+ (void *) &cmd_fa_dscp_config_pipeline_id,
+ (void *) &cmd_fa_dscp_config_dscp_string,
+ (void *) &cmd_fa_dscp_config_dscp_id,
+ (void *) &cmd_fa_dscp_config_class_string,
+ (void *) &cmd_fa_dscp_config_traffic_class_id,
+ (void *) &cmd_fa_dscp_config_color_string,
+ (void *) &cmd_fa_dscp_config_color,
+ NULL,
+ },
+};
+
+/*
+ * Flow policer stats read
+ *
+ * p <pipeline ID> flow <flow ID> policer <policer ID> stats
+ */
+
+struct cmd_fa_policer_stats_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ uint32_t flow_id;
+ cmdline_fixed_string_t policer_string;
+ uint32_t policer_id;
+ cmdline_fixed_string_t stats_string;
+};
+
+static void
+cmd_fa_policer_stats_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_policer_stats_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fa_policer_stats stats;
+ int status;
+
+ status = app_pipeline_fa_flow_policer_stats_read(app,
+ params->pipeline_id,
+ params->flow_id,
+ params->policer_id,
+ 1,
+ &stats);
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ /* Display stats */
+ printf("\tPkts G: %" PRIu64
+ "\tPkts Y: %" PRIu64
+ "\tPkts R: %" PRIu64
+ "\tPkts D: %" PRIu64 "\n",
+ stats.n_pkts[e_RTE_METER_GREEN],
+ stats.n_pkts[e_RTE_METER_YELLOW],
+ stats.n_pkts[e_RTE_METER_RED],
+ stats.n_pkts_drop);
+}
+
+cmdline_parse_token_string_t cmd_fa_policer_stats_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_policer_stats_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_stats_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result,
+ flow_string, "flow");
+
+cmdline_parse_token_num_t cmd_fa_policer_stats_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result,
+ flow_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_stats_policer_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result,
+ policer_string, "policer");
+
+cmdline_parse_token_num_t cmd_fa_policer_stats_policer_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_policer_stats_result,
+ policer_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_policer_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_policer_stats_result,
+ stats_string, "stats");
+
+cmdline_parse_inst_t cmd_fa_policer_stats = {
+ .f = cmd_fa_policer_stats_parsed,
+ .data = NULL,
+ .help_str = "Flow policer stats read",
+ .tokens = {
+ (void *) &cmd_fa_policer_stats_p_string,
+ (void *) &cmd_fa_policer_stats_pipeline_id,
+ (void *) &cmd_fa_policer_stats_flow_string,
+ (void *) &cmd_fa_policer_stats_flow_id,
+ (void *) &cmd_fa_policer_stats_policer_string,
+ (void *) &cmd_fa_policer_stats_policer_id,
+ (void *) &cmd_fa_policer_stats_string,
+ NULL,
+ },
+};
+
+/*
+ * Flow list
+ *
+ * p <pipeline ID> flow ls
+ */
+
+struct cmd_fa_flow_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t actions_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_fa_flow_ls_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_flow_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_fa_flow_ls(app, params->pipeline_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fa_flow_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_flow_ls_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_flow_ls_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_flow_ls_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fa_flow_ls_actions_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result,
+ actions_string, "actions");
+
+cmdline_parse_token_string_t cmd_fa_flow_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_flow_ls_result,
+ ls_string, "ls");
+
+cmdline_parse_inst_t cmd_fa_flow_ls = {
+ .f = cmd_fa_flow_ls_parsed,
+ .data = NULL,
+ .help_str = "Flow actions list",
+ .tokens = {
+ (void *) &cmd_fa_flow_ls_p_string,
+ (void *) &cmd_fa_flow_ls_pipeline_id,
+ (void *) &cmd_fa_flow_ls_flow_string,
+ (void *) &cmd_fa_flow_ls_actions_string,
+ (void *) &cmd_fa_flow_ls_ls_string,
+ NULL,
+ },
+};
+
+/*
+ * Flow DiffServ Code Point (DSCP) translation table list
+ *
+ * p <pipeline ID> dscp ls
+ */
+
+struct cmd_fa_dscp_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t dscp_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_fa_dscp_ls_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fa_dscp_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_fa_dscp_ls(app, params->pipeline_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fa_dscp_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fa_dscp_ls_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fa_dscp_ls_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fa_dscp_ls_dscp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result,
+ dscp_string, "dscp");
+
+cmdline_parse_token_string_t cmd_fa_dscp_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fa_dscp_ls_result, ls_string,
+ "ls");
+
+cmdline_parse_inst_t cmd_fa_dscp_ls = {
+ .f = cmd_fa_dscp_ls_parsed,
+ .data = NULL,
+ .help_str = "Flow DSCP translaton table list",
+ .tokens = {
+ (void *) &cmd_fa_dscp_ls_p_string,
+ (void *) &cmd_fa_dscp_ls_pipeline_id,
+ (void *) &cmd_fa_dscp_ls_dscp_string,
+ (void *) &cmd_fa_dscp_ls_string,
+ NULL,
+ },
+};
+
+static cmdline_parse_ctx_t pipeline_cmds[] = {
+ (cmdline_parse_inst_t *) &cmd_fa_meter_config,
+ (cmdline_parse_inst_t *) &cmd_fa_meter_config_bulk,
+ (cmdline_parse_inst_t *) &cmd_fa_policer_config,
+ (cmdline_parse_inst_t *) &cmd_fa_policer_config_bulk,
+ (cmdline_parse_inst_t *) &cmd_fa_output_port_config,
+ (cmdline_parse_inst_t *) &cmd_fa_output_port_config_bulk,
+ (cmdline_parse_inst_t *) &cmd_fa_dscp_config,
+ (cmdline_parse_inst_t *) &cmd_fa_policer_stats,
+ (cmdline_parse_inst_t *) &cmd_fa_flow_ls,
+ (cmdline_parse_inst_t *) &cmd_fa_dscp_ls,
+ NULL,
+};
+
+static struct pipeline_fe_ops pipeline_flow_actions_fe_ops = {
+ .f_init = app_pipeline_fa_init,
+ .f_free = app_pipeline_fa_free,
+ .cmds = pipeline_cmds,
+};
+
+struct pipeline_type pipeline_flow_actions = {
+ .name = "FLOW_ACTIONS",
+ .be_ops = &pipeline_flow_actions_be_ops,
+ .fe_ops = &pipeline_flow_actions_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions.h b/examples/ip_pipeline/pipeline/pipeline_flow_actions.h
new file mode 100644
index 00000000..f2cd0cbb
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions.h
@@ -0,0 +1,78 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FLOW_ACTIONS_H__
+#define __INCLUDE_PIPELINE_FLOW_ACTIONS_H__
+
+#include <rte_meter.h>
+
+#include "pipeline.h"
+#include "pipeline_flow_actions_be.h"
+
+int
+app_pipeline_fa_flow_config(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t flow_id,
+ uint32_t meter_update_mask,
+ uint32_t policer_update_mask,
+ uint32_t port_update,
+ struct pipeline_fa_flow_params *params);
+
+int
+app_pipeline_fa_flow_config_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t *flow_id,
+ uint32_t n_flows,
+ uint32_t meter_update_mask,
+ uint32_t policer_update_mask,
+ uint32_t port_update,
+ struct pipeline_fa_flow_params *params);
+
+int
+app_pipeline_fa_dscp_config(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t dscp,
+ uint32_t traffic_class,
+ enum rte_meter_color color);
+
+int
+app_pipeline_fa_flow_policer_stats_read(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t flow_id,
+ uint32_t policer_id,
+ int clear,
+ struct pipeline_fa_policer_stats *stats);
+
+extern struct pipeline_type pipeline_flow_actions;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c
new file mode 100644
index 00000000..3ad3ee63
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.c
@@ -0,0 +1,1011 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_table_array.h>
+#include <rte_byteorder.h>
+#include <rte_ip.h>
+
+#include "pipeline_actions_common.h"
+#include "pipeline_flow_actions_be.h"
+#include "parser.h"
+#include "hash_func.h"
+
+int
+pipeline_fa_flow_params_set_default(struct pipeline_fa_flow_params *params)
+{
+ uint32_t i;
+
+ if (params == NULL)
+ return -1;
+
+ for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) {
+ struct rte_meter_trtcm_params *m = &params->m[i];
+
+ m->cir = 1;
+ m->cbs = 1;
+ m->pir = 1;
+ m->pbs = 2;
+ }
+
+ for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) {
+ struct pipeline_fa_policer_params *p = &params->p[i];
+ uint32_t j;
+
+ for (j = 0; j < e_RTE_METER_COLORS; j++) {
+ struct pipeline_fa_policer_action *a = &p->action[j];
+
+ a->drop = 0;
+ a->color = (enum rte_meter_color) j;
+ }
+ }
+
+ params->port_id = 0;
+
+ return 0;
+}
+
+struct dscp_entry {
+ uint32_t traffic_class;
+ enum rte_meter_color color;
+};
+
+struct pipeline_flow_actions {
+ struct pipeline p;
+ struct pipeline_fa_params params;
+ pipeline_msg_req_handler custom_handlers[PIPELINE_FA_MSG_REQS];
+
+ struct dscp_entry dscp[PIPELINE_FA_N_DSCP];
+} __rte_cache_aligned;
+
+static void *
+pipeline_fa_msg_req_custom_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler handlers[] = {
+ [PIPELINE_MSG_REQ_PING] =
+ pipeline_msg_req_ping_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_IN] =
+ pipeline_msg_req_stats_port_in_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_OUT] =
+ pipeline_msg_req_stats_port_out_handler,
+ [PIPELINE_MSG_REQ_STATS_TABLE] =
+ pipeline_msg_req_stats_table_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_ENABLE] =
+ pipeline_msg_req_port_in_enable_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_DISABLE] =
+ pipeline_msg_req_port_in_disable_handler,
+ [PIPELINE_MSG_REQ_CUSTOM] =
+ pipeline_fa_msg_req_custom_handler,
+};
+
+static void *
+pipeline_fa_msg_req_flow_config_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fa_msg_req_flow_config_bulk_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fa_msg_req_dscp_config_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fa_msg_req_policer_stats_read_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler custom_handlers[] = {
+ [PIPELINE_FA_MSG_REQ_FLOW_CONFIG] =
+ pipeline_fa_msg_req_flow_config_handler,
+ [PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK] =
+ pipeline_fa_msg_req_flow_config_bulk_handler,
+ [PIPELINE_FA_MSG_REQ_DSCP_CONFIG] =
+ pipeline_fa_msg_req_dscp_config_handler,
+ [PIPELINE_FA_MSG_REQ_POLICER_STATS_READ] =
+ pipeline_fa_msg_req_policer_stats_read_handler,
+};
+
+/*
+ * Flow table
+ */
+struct meter_policer {
+ struct rte_meter_trtcm meter;
+ struct pipeline_fa_policer_params policer;
+ struct pipeline_fa_policer_stats stats;
+};
+
+struct flow_table_entry {
+ struct rte_pipeline_table_entry head;
+ struct meter_policer mp[PIPELINE_FA_N_TC_MAX];
+};
+
+static int
+flow_table_entry_set_meter(struct flow_table_entry *entry,
+ uint32_t meter_id,
+ struct pipeline_fa_flow_params *params)
+{
+ struct rte_meter_trtcm *meter = &entry->mp[meter_id].meter;
+ struct rte_meter_trtcm_params *meter_params = &params->m[meter_id];
+
+ return rte_meter_trtcm_config(meter, meter_params);
+}
+
+static void
+flow_table_entry_set_policer(struct flow_table_entry *entry,
+ uint32_t policer_id,
+ struct pipeline_fa_flow_params *params)
+{
+ struct pipeline_fa_policer_params *p0 = &entry->mp[policer_id].policer;
+ struct pipeline_fa_policer_params *p1 = &params->p[policer_id];
+
+ memcpy(p0, p1, sizeof(*p0));
+}
+
+static void
+flow_table_entry_set_port_id(struct pipeline_flow_actions *p,
+ struct flow_table_entry *entry,
+ struct pipeline_fa_flow_params *params)
+{
+ entry->head.action = RTE_PIPELINE_ACTION_PORT;
+ entry->head.port_id = p->p.port_out_id[params->port_id];
+}
+
+static int
+flow_table_entry_set_default(struct pipeline_flow_actions *p,
+ struct flow_table_entry *entry)
+{
+ struct pipeline_fa_flow_params params;
+ uint32_t i;
+
+ pipeline_fa_flow_params_set_default(&params);
+
+ memset(entry, 0, sizeof(*entry));
+
+ flow_table_entry_set_port_id(p, entry, &params);
+
+ for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++) {
+ int status;
+
+ status = flow_table_entry_set_meter(entry, i, &params);
+ if (status)
+ return status;
+ }
+
+ for (i = 0; i < PIPELINE_FA_N_TC_MAX; i++)
+ flow_table_entry_set_policer(entry, i, &params);
+
+ return 0;
+}
+
+static inline uint64_t
+pkt_work(
+ struct rte_mbuf *pkt,
+ struct rte_pipeline_table_entry *table_entry,
+ void *arg,
+ uint64_t time)
+{
+ struct pipeline_flow_actions *p = arg;
+ struct flow_table_entry *entry =
+ (struct flow_table_entry *) table_entry;
+
+ struct ipv4_hdr *pkt_ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, p->params.ip_hdr_offset);
+ enum rte_meter_color *pkt_color = (enum rte_meter_color *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, p->params.color_offset);
+
+ /* Read (IP header) */
+ uint32_t total_length = rte_bswap16(pkt_ip->total_length);
+ uint32_t dscp = pkt_ip->type_of_service >> 2;
+
+ uint32_t tc = p->dscp[dscp].traffic_class;
+ enum rte_meter_color color = p->dscp[dscp].color;
+
+ struct rte_meter_trtcm *meter = &entry->mp[tc].meter;
+ struct pipeline_fa_policer_params *policer = &entry->mp[tc].policer;
+ struct pipeline_fa_policer_stats *stats = &entry->mp[tc].stats;
+
+ /* Read (entry), compute */
+ enum rte_meter_color color2 = rte_meter_trtcm_color_aware_check(meter,
+ time,
+ total_length,
+ color);
+
+ enum rte_meter_color color3 = policer->action[color2].color;
+ uint64_t drop = policer->action[color2].drop;
+
+ /* Read (entry), write (entry, color) */
+ stats->n_pkts[color3] += drop ^ 1LLU;
+ stats->n_pkts_drop += drop;
+ *pkt_color = color3;
+
+ return drop;
+}
+
+static inline uint64_t
+pkt4_work(
+ struct rte_mbuf **pkts,
+ struct rte_pipeline_table_entry **table_entries,
+ void *arg,
+ uint64_t time)
+{
+ struct pipeline_flow_actions *p = arg;
+
+ struct flow_table_entry *entry0 =
+ (struct flow_table_entry *) table_entries[0];
+ struct flow_table_entry *entry1 =
+ (struct flow_table_entry *) table_entries[1];
+ struct flow_table_entry *entry2 =
+ (struct flow_table_entry *) table_entries[2];
+ struct flow_table_entry *entry3 =
+ (struct flow_table_entry *) table_entries[3];
+
+ struct ipv4_hdr *pkt0_ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p->params.ip_hdr_offset);
+ struct ipv4_hdr *pkt1_ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p->params.ip_hdr_offset);
+ struct ipv4_hdr *pkt2_ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p->params.ip_hdr_offset);
+ struct ipv4_hdr *pkt3_ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p->params.ip_hdr_offset);
+
+ enum rte_meter_color *pkt0_color = (enum rte_meter_color *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p->params.color_offset);
+ enum rte_meter_color *pkt1_color = (enum rte_meter_color *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p->params.color_offset);
+ enum rte_meter_color *pkt2_color = (enum rte_meter_color *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p->params.color_offset);
+ enum rte_meter_color *pkt3_color = (enum rte_meter_color *)
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p->params.color_offset);
+
+ /* Read (IP header) */
+ uint32_t total_length0 = rte_bswap16(pkt0_ip->total_length);
+ uint32_t dscp0 = pkt0_ip->type_of_service >> 2;
+
+ uint32_t total_length1 = rte_bswap16(pkt1_ip->total_length);
+ uint32_t dscp1 = pkt1_ip->type_of_service >> 2;
+
+ uint32_t total_length2 = rte_bswap16(pkt2_ip->total_length);
+ uint32_t dscp2 = pkt2_ip->type_of_service >> 2;
+
+ uint32_t total_length3 = rte_bswap16(pkt3_ip->total_length);
+ uint32_t dscp3 = pkt3_ip->type_of_service >> 2;
+
+ uint32_t tc0 = p->dscp[dscp0].traffic_class;
+ enum rte_meter_color color0 = p->dscp[dscp0].color;
+
+ uint32_t tc1 = p->dscp[dscp1].traffic_class;
+ enum rte_meter_color color1 = p->dscp[dscp1].color;
+
+ uint32_t tc2 = p->dscp[dscp2].traffic_class;
+ enum rte_meter_color color2 = p->dscp[dscp2].color;
+
+ uint32_t tc3 = p->dscp[dscp3].traffic_class;
+ enum rte_meter_color color3 = p->dscp[dscp3].color;
+
+ struct rte_meter_trtcm *meter0 = &entry0->mp[tc0].meter;
+ struct pipeline_fa_policer_params *policer0 = &entry0->mp[tc0].policer;
+ struct pipeline_fa_policer_stats *stats0 = &entry0->mp[tc0].stats;
+
+ struct rte_meter_trtcm *meter1 = &entry1->mp[tc1].meter;
+ struct pipeline_fa_policer_params *policer1 = &entry1->mp[tc1].policer;
+ struct pipeline_fa_policer_stats *stats1 = &entry1->mp[tc1].stats;
+
+ struct rte_meter_trtcm *meter2 = &entry2->mp[tc2].meter;
+ struct pipeline_fa_policer_params *policer2 = &entry2->mp[tc2].policer;
+ struct pipeline_fa_policer_stats *stats2 = &entry2->mp[tc2].stats;
+
+ struct rte_meter_trtcm *meter3 = &entry3->mp[tc3].meter;
+ struct pipeline_fa_policer_params *policer3 = &entry3->mp[tc3].policer;
+ struct pipeline_fa_policer_stats *stats3 = &entry3->mp[tc3].stats;
+
+ /* Read (entry), compute, write (entry) */
+ enum rte_meter_color color2_0 = rte_meter_trtcm_color_aware_check(
+ meter0,
+ time,
+ total_length0,
+ color0);
+
+ enum rte_meter_color color2_1 = rte_meter_trtcm_color_aware_check(
+ meter1,
+ time,
+ total_length1,
+ color1);
+
+ enum rte_meter_color color2_2 = rte_meter_trtcm_color_aware_check(
+ meter2,
+ time,
+ total_length2,
+ color2);
+
+ enum rte_meter_color color2_3 = rte_meter_trtcm_color_aware_check(
+ meter3,
+ time,
+ total_length3,
+ color3);
+
+ enum rte_meter_color color3_0 = policer0->action[color2_0].color;
+ enum rte_meter_color color3_1 = policer1->action[color2_1].color;
+ enum rte_meter_color color3_2 = policer2->action[color2_2].color;
+ enum rte_meter_color color3_3 = policer3->action[color2_3].color;
+
+ uint64_t drop0 = policer0->action[color2_0].drop;
+ uint64_t drop1 = policer1->action[color2_1].drop;
+ uint64_t drop2 = policer2->action[color2_2].drop;
+ uint64_t drop3 = policer3->action[color2_3].drop;
+
+ /* Read (entry), write (entry, color) */
+ stats0->n_pkts[color3_0] += drop0 ^ 1LLU;
+ stats0->n_pkts_drop += drop0;
+
+ stats1->n_pkts[color3_1] += drop1 ^ 1LLU;
+ stats1->n_pkts_drop += drop1;
+
+ stats2->n_pkts[color3_2] += drop2 ^ 1LLU;
+ stats2->n_pkts_drop += drop2;
+
+ stats3->n_pkts[color3_3] += drop3 ^ 1LLU;
+ stats3->n_pkts_drop += drop3;
+
+ *pkt0_color = color3_0;
+ *pkt1_color = color3_1;
+ *pkt2_color = color3_2;
+ *pkt3_color = color3_3;
+
+ return drop0 | (drop1 << 1) | (drop2 << 2) | (drop3 << 3);
+}
+
+PIPELINE_TABLE_AH_HIT_DROP_TIME(fa_table_ah_hit, pkt_work, pkt4_work);
+
+static rte_pipeline_table_action_handler_hit
+get_fa_table_ah_hit(__rte_unused struct pipeline_flow_actions *p)
+{
+ return fa_table_ah_hit;
+}
+
+/*
+ * Argument parsing
+ */
+int
+pipeline_fa_parse_args(struct pipeline_fa_params *p,
+ struct pipeline_params *params)
+{
+ uint32_t n_flows_present = 0;
+ uint32_t n_meters_per_flow_present = 0;
+ uint32_t flow_id_offset_present = 0;
+ uint32_t ip_hdr_offset_present = 0;
+ uint32_t color_offset_present = 0;
+ uint32_t i;
+
+ /* Default values */
+ p->n_meters_per_flow = 1;
+ p->dscp_enabled = 0;
+
+ for (i = 0; i < params->n_args; i++) {
+ char *arg_name = params->args_name[i];
+ char *arg_value = params->args_value[i];
+
+ /* n_flows */
+ if (strcmp(arg_name, "n_flows") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_flows_present == 0, params->name,
+ arg_name);
+ n_flows_present = 1;
+
+ status = parser_read_uint32(&p->n_flows,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->n_flows != 0)), params->name,
+ arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* n_meters_per_flow */
+ if (strcmp(arg_name, "n_meters_per_flow") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_meters_per_flow_present == 0,
+ params->name, arg_name);
+ n_meters_per_flow_present = 1;
+
+ status = parser_read_uint32(&p->n_meters_per_flow,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->n_meters_per_flow != 0)),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) &&
+ (p->n_meters_per_flow <=
+ PIPELINE_FA_N_TC_MAX)), params->name,
+ arg_name, arg_value);
+
+ continue;
+ }
+
+ /* flow_id_offset */
+ if (strcmp(arg_name, "flow_id_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ flow_id_offset_present == 0,
+ params->name, arg_name);
+ flow_id_offset_present = 1;
+
+ status = parser_read_uint32(&p->flow_id_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* ip_hdr_offset */
+ if (strcmp(arg_name, "ip_hdr_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ ip_hdr_offset_present == 0,
+ params->name, arg_name);
+ ip_hdr_offset_present = 1;
+
+ status = parser_read_uint32(&p->ip_hdr_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* color_offset */
+ if (strcmp(arg_name, "color_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ color_offset_present == 0, params->name,
+ arg_name);
+ color_offset_present = 1;
+
+ status = parser_read_uint32(&p->color_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ p->dscp_enabled = 1;
+
+ continue;
+ }
+
+ /* Unknown argument */
+ PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name);
+ }
+
+ /* Check that mandatory arguments are present */
+ PIPELINE_PARSE_ERR_MANDATORY((n_flows_present), params->name,
+ "n_flows");
+ PIPELINE_PARSE_ERR_MANDATORY((flow_id_offset_present),
+ params->name, "flow_id_offset");
+ PIPELINE_PARSE_ERR_MANDATORY((ip_hdr_offset_present),
+ params->name, "ip_hdr_offset");
+ PIPELINE_PARSE_ERR_MANDATORY((color_offset_present), params->name,
+ "color_offset");
+
+ return 0;
+}
+
+static void
+dscp_init(struct pipeline_flow_actions *p)
+{
+ uint32_t i;
+
+ for (i = 0; i < PIPELINE_FA_N_DSCP; i++) {
+ p->dscp[i].traffic_class = 0;
+ p->dscp[i].color = e_RTE_METER_GREEN;
+ }
+}
+
+static void *pipeline_fa_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline *p;
+ struct pipeline_flow_actions *p_fa;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if (params == NULL)
+ return NULL;
+
+ if (params->n_ports_in != params->n_ports_out)
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(
+ sizeof(struct pipeline_flow_actions));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+ p_fa = (struct pipeline_flow_actions *) p;
+
+ strcpy(p->name, params->name);
+ p->log_level = params->log_level;
+
+ PLOG(p, HIGH, "Flow actions");
+
+ /* Parse arguments */
+ if (pipeline_fa_parse_args(&p_fa->params, params))
+ return NULL;
+
+ dscp_init(p_fa);
+
+ /* Pipeline */
+ {
+ struct rte_pipeline_params pipeline_params = {
+ .name = params->name,
+ .socket_id = params->socket_id,
+ .offset_port_id = 0,
+ };
+
+ p->p = rte_pipeline_create(&pipeline_params);
+ if (p->p == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Input ports */
+ p->n_ports_in = params->n_ports_in;
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_port_in_params port_params = {
+ .ops = pipeline_port_in_params_get_ops(
+ &params->port_in[i]),
+ .arg_create = pipeline_port_in_params_convert(
+ &params->port_in[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ .burst_size = params->port_in[i].burst_size,
+ };
+
+ int status = rte_pipeline_port_in_create(p->p,
+ &port_params,
+ &p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Output ports */
+ p->n_ports_out = params->n_ports_out;
+ for (i = 0; i < p->n_ports_out; i++) {
+ struct rte_pipeline_port_out_params port_params = {
+ .ops = pipeline_port_out_params_get_ops(
+ &params->port_out[i]),
+ .arg_create = pipeline_port_out_params_convert(
+ &params->port_out[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ };
+
+ int status = rte_pipeline_port_out_create(p->p,
+ &port_params,
+ &p->port_out_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Tables */
+ p->n_tables = 1;
+ {
+ struct rte_table_array_params table_array_params = {
+ .n_entries = p_fa->params.n_flows,
+ .offset = p_fa->params.flow_id_offset,
+ };
+
+ struct rte_pipeline_table_params table_params = {
+ .ops = &rte_table_array_ops,
+ .arg_create = &table_array_params,
+ .f_action_hit = get_fa_table_ah_hit(p_fa),
+ .f_action_miss = NULL,
+ .arg_ah = p_fa,
+ .action_data_size =
+ sizeof(struct flow_table_entry) -
+ sizeof(struct rte_pipeline_table_entry),
+ };
+
+ int status;
+
+ status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Connecting input ports to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_connect_to_table(p->p,
+ p->port_in_id[i],
+ p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Enable input ports */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_enable(p->p,
+ p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Initialize table entries */
+ for (i = 0; i < p_fa->params.n_flows; i++) {
+ struct rte_table_array_key key = {
+ .pos = i,
+ };
+
+ struct flow_table_entry entry;
+ struct rte_pipeline_table_entry *entry_ptr;
+ int key_found, status;
+
+ flow_table_entry_set_default(p_fa, &entry);
+
+ status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &key,
+ (struct rte_pipeline_table_entry *) &entry,
+ &key_found,
+ &entry_ptr);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Check pipeline consistency */
+ if (rte_pipeline_check(p->p) < 0) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Message queues */
+ p->n_msgq = params->n_msgq;
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_in[i] = params->msgq_in[i];
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_out[i] = params->msgq_out[i];
+
+ /* Message handlers */
+ memcpy(p->handlers, handlers, sizeof(p->handlers));
+ memcpy(p_fa->custom_handlers,
+ custom_handlers,
+ sizeof(p_fa->custom_handlers));
+
+ return p;
+}
+
+static int
+pipeline_fa_free(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return 0;
+}
+
+static int
+pipeline_fa_track(void *pipeline,
+ __rte_unused uint32_t port_in,
+ uint32_t *port_out)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if ((p == NULL) ||
+ (port_in >= p->n_ports_in) ||
+ (port_out == NULL))
+ return -1;
+
+ if (p->n_ports_in == 1) {
+ *port_out = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+pipeline_fa_timer(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ pipeline_msg_req_handle(p);
+ rte_pipeline_flush(p->p);
+
+ return 0;
+}
+
+void *
+pipeline_fa_msg_req_custom_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_flow_actions *p_fa =
+ (struct pipeline_flow_actions *) p;
+ struct pipeline_custom_msg_req *req = msg;
+ pipeline_msg_req_handler f_handle;
+
+ f_handle = (req->subtype < PIPELINE_FA_MSG_REQS) ?
+ p_fa->custom_handlers[req->subtype] :
+ pipeline_msg_req_invalid_handler;
+
+ if (f_handle == NULL)
+ f_handle = pipeline_msg_req_invalid_handler;
+
+ return f_handle(p, req);
+}
+
+void *
+pipeline_fa_msg_req_flow_config_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p;
+ struct pipeline_fa_flow_config_msg_req *req = msg;
+ struct pipeline_fa_flow_config_msg_rsp *rsp = msg;
+ struct flow_table_entry *entry;
+ uint32_t mask, i;
+
+ /* Set flow table entry to default if not configured before */
+ if (req->entry_ptr == NULL) {
+ struct rte_table_array_key key = {
+ .pos = req->flow_id % p_fa->params.n_flows,
+ };
+
+ struct flow_table_entry default_entry;
+
+ int key_found, status;
+
+ flow_table_entry_set_default(p_fa, &default_entry);
+
+ status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &key,
+ (struct rte_pipeline_table_entry *) &default_entry,
+ &key_found,
+ (struct rte_pipeline_table_entry **) &entry);
+ if (status) {
+ rsp->status = -1;
+ return rsp;
+ }
+ } else
+ entry = (struct flow_table_entry *) req->entry_ptr;
+
+ /* Meter */
+ for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) {
+ int status;
+
+ if ((mask & req->meter_update_mask) == 0)
+ continue;
+
+ status = flow_table_entry_set_meter(entry, i, &req->params);
+ if (status) {
+ rsp->status = -1;
+ return rsp;
+ }
+ }
+
+ /* Policer */
+ for (i = 0, mask = 1; i < PIPELINE_FA_N_TC_MAX; i++, mask <<= 1) {
+ if ((mask & req->policer_update_mask) == 0)
+ continue;
+
+ flow_table_entry_set_policer(entry, i, &req->params);
+ }
+
+ /* Port */
+ if (req->port_update)
+ flow_table_entry_set_port_id(p_fa, entry, &req->params);
+
+ /* Response */
+ rsp->status = 0;
+ rsp->entry_ptr = (void *) entry;
+ return rsp;
+}
+
+void *
+pipeline_fa_msg_req_flow_config_bulk_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p;
+ struct pipeline_fa_flow_config_bulk_msg_req *req = msg;
+ struct pipeline_fa_flow_config_bulk_msg_rsp *rsp = msg;
+ uint32_t i;
+
+ for (i = 0; i < req->n_flows; i++) {
+ struct flow_table_entry *entry;
+ uint32_t j, mask;
+
+ /* Set flow table entry to default if not configured before */
+ if (req->entry_ptr[i] == NULL) {
+ struct rte_table_array_key key = {
+ .pos = req->flow_id[i] % p_fa->params.n_flows,
+ };
+
+ struct flow_table_entry entry_to_add;
+
+ int key_found, status;
+
+ flow_table_entry_set_default(p_fa, &entry_to_add);
+
+ status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &key,
+ (struct rte_pipeline_table_entry *) &entry_to_add,
+ &key_found,
+ (struct rte_pipeline_table_entry **) &entry);
+ if (status) {
+ rsp->n_flows = i;
+ return rsp;
+ }
+
+ req->entry_ptr[i] = (void *) entry;
+ } else
+ entry = (struct flow_table_entry *) req->entry_ptr[i];
+
+ /* Meter */
+ for (j = 0, mask = 1;
+ j < PIPELINE_FA_N_TC_MAX;
+ j++, mask <<= 1) {
+ int status;
+
+ if ((mask & req->meter_update_mask) == 0)
+ continue;
+
+ status = flow_table_entry_set_meter(entry,
+ j, &req->params[i]);
+ if (status) {
+ rsp->n_flows = i;
+ return rsp;
+ }
+ }
+
+ /* Policer */
+ for (j = 0, mask = 1;
+ j < PIPELINE_FA_N_TC_MAX;
+ j++, mask <<= 1) {
+ if ((mask & req->policer_update_mask) == 0)
+ continue;
+
+ flow_table_entry_set_policer(entry,
+ j, &req->params[i]);
+ }
+
+ /* Port */
+ if (req->port_update)
+ flow_table_entry_set_port_id(p_fa,
+ entry, &req->params[i]);
+ }
+
+ /* Response */
+ rsp->n_flows = i;
+ return rsp;
+}
+
+void *
+pipeline_fa_msg_req_dscp_config_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_flow_actions *p_fa = (struct pipeline_flow_actions *) p;
+ struct pipeline_fa_dscp_config_msg_req *req = msg;
+ struct pipeline_fa_dscp_config_msg_rsp *rsp = msg;
+
+ /* Check request */
+ if ((req->dscp >= PIPELINE_FA_N_DSCP) ||
+ (req->traffic_class >= PIPELINE_FA_N_TC_MAX) ||
+ (req->color >= e_RTE_METER_COLORS)) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ p_fa->dscp[req->dscp].traffic_class = req->traffic_class;
+ p_fa->dscp[req->dscp].color = req->color;
+ rsp->status = 0;
+ return rsp;
+}
+
+void *
+pipeline_fa_msg_req_policer_stats_read_handler(__rte_unused struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_fa_policer_stats_msg_req *req = msg;
+ struct pipeline_fa_policer_stats_msg_rsp *rsp = msg;
+
+ struct flow_table_entry *entry = req->entry_ptr;
+ uint32_t policer_id = req->policer_id;
+ int clear = req->clear;
+
+ /* Check request */
+ if ((req->entry_ptr == NULL) ||
+ (req->policer_id >= PIPELINE_FA_N_TC_MAX)) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ memcpy(&rsp->stats,
+ &entry->mp[policer_id].stats,
+ sizeof(rsp->stats));
+ if (clear)
+ memset(&entry->mp[policer_id].stats,
+ 0, sizeof(entry->mp[policer_id].stats));
+ rsp->status = 0;
+ return rsp;
+}
+
+struct pipeline_be_ops pipeline_flow_actions_be_ops = {
+ .f_init = pipeline_fa_init,
+ .f_free = pipeline_fa_free,
+ .f_run = NULL,
+ .f_timer = pipeline_fa_timer,
+ .f_track = pipeline_fa_track,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h
new file mode 100644
index 00000000..456f2cca
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_actions_be.h
@@ -0,0 +1,168 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FLOW_ACTIONS_BE_H__
+#define __INCLUDE_PIPELINE_FLOW_ACTIONS_BE_H__
+
+#include <rte_meter.h>
+
+#include "pipeline_common_be.h"
+
+#ifndef PIPELINE_FA_N_TC_MAX
+#define PIPELINE_FA_N_TC_MAX 4
+#endif
+
+#define PIPELINE_FA_N_DSCP 64
+
+struct pipeline_fa_params {
+ uint32_t n_flows;
+ uint32_t n_meters_per_flow;
+ uint32_t flow_id_offset;
+ uint32_t ip_hdr_offset;
+ uint32_t color_offset;
+ uint32_t dscp_enabled;
+};
+
+int
+pipeline_fa_parse_args(struct pipeline_fa_params *p,
+ struct pipeline_params *params);
+
+struct pipeline_fa_policer_action {
+ uint32_t drop;
+ enum rte_meter_color color;
+};
+
+struct pipeline_fa_policer_params {
+ struct pipeline_fa_policer_action action[e_RTE_METER_COLORS];
+};
+
+struct pipeline_fa_flow_params {
+ struct rte_meter_trtcm_params m[PIPELINE_FA_N_TC_MAX];
+ struct pipeline_fa_policer_params p[PIPELINE_FA_N_TC_MAX];
+ uint32_t port_id;
+};
+
+int
+pipeline_fa_flow_params_set_default(struct pipeline_fa_flow_params *params);
+
+struct pipeline_fa_policer_stats {
+ uint64_t n_pkts[e_RTE_METER_COLORS];
+ uint64_t n_pkts_drop;
+};
+
+enum pipeline_fa_msg_req_type {
+ PIPELINE_FA_MSG_REQ_FLOW_CONFIG = 0,
+ PIPELINE_FA_MSG_REQ_FLOW_CONFIG_BULK,
+ PIPELINE_FA_MSG_REQ_DSCP_CONFIG,
+ PIPELINE_FA_MSG_REQ_POLICER_STATS_READ,
+ PIPELINE_FA_MSG_REQS,
+};
+
+/*
+ * MSG FLOW CONFIG
+ */
+struct pipeline_fa_flow_config_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fa_msg_req_type subtype;
+
+ void *entry_ptr;
+ uint32_t flow_id;
+
+ uint32_t meter_update_mask;
+ uint32_t policer_update_mask;
+ uint32_t port_update;
+ struct pipeline_fa_flow_params params;
+};
+
+struct pipeline_fa_flow_config_msg_rsp {
+ int status;
+ void *entry_ptr;
+};
+
+/*
+ * MSG FLOW CONFIG BULK
+ */
+struct pipeline_fa_flow_config_bulk_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fa_msg_req_type subtype;
+
+ void **entry_ptr;
+ uint32_t *flow_id;
+ uint32_t n_flows;
+
+ uint32_t meter_update_mask;
+ uint32_t policer_update_mask;
+ uint32_t port_update;
+ struct pipeline_fa_flow_params *params;
+};
+
+struct pipeline_fa_flow_config_bulk_msg_rsp {
+ uint32_t n_flows;
+};
+
+/*
+ * MSG DSCP CONFIG
+ */
+struct pipeline_fa_dscp_config_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fa_msg_req_type subtype;
+
+ uint32_t dscp;
+ uint32_t traffic_class;
+ enum rte_meter_color color;
+};
+
+struct pipeline_fa_dscp_config_msg_rsp {
+ int status;
+};
+
+/*
+ * MSG POLICER STATS READ
+ */
+struct pipeline_fa_policer_stats_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fa_msg_req_type subtype;
+
+ void *entry_ptr;
+ uint32_t policer_id;
+ int clear;
+};
+
+struct pipeline_fa_policer_stats_msg_rsp {
+ int status;
+ struct pipeline_fa_policer_stats stats;
+};
+
+extern struct pipeline_be_ops pipeline_flow_actions_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification.c b/examples/ip_pipeline/pipeline/pipeline_flow_classification.c
new file mode 100644
index 00000000..19215748
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification.c
@@ -0,0 +1,2215 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+
+#include <rte_common.h>
+#include <rte_hexdump.h>
+#include <rte_malloc.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "app.h"
+#include "pipeline_common_fe.h"
+#include "pipeline_flow_classification.h"
+#include "hash_func.h"
+
+/*
+ * Key conversion
+ */
+
+struct pkt_key_qinq {
+ uint16_t ethertype_svlan;
+ uint16_t svlan;
+ uint16_t ethertype_cvlan;
+ uint16_t cvlan;
+} __attribute__((__packed__));
+
+struct pkt_key_ipv4_5tuple {
+ uint8_t ttl;
+ uint8_t proto;
+ uint16_t checksum;
+ uint32_t ip_src;
+ uint32_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+} __attribute__((__packed__));
+
+struct pkt_key_ipv6_5tuple {
+ uint16_t payload_length;
+ uint8_t proto;
+ uint8_t hop_limit;
+ uint8_t ip_src[16];
+ uint8_t ip_dst[16];
+ uint16_t port_src;
+ uint16_t port_dst;
+} __attribute__((__packed__));
+
+static int
+app_pipeline_fc_key_convert(struct pipeline_fc_key *key_in,
+ uint8_t *key_out,
+ uint32_t *signature)
+{
+ uint8_t buffer[PIPELINE_FC_FLOW_KEY_MAX_SIZE];
+ void *key_buffer = (key_out) ? key_out : buffer;
+
+ switch (key_in->type) {
+ case FLOW_KEY_QINQ:
+ {
+ struct pkt_key_qinq *qinq = key_buffer;
+
+ qinq->ethertype_svlan = 0;
+ qinq->svlan = rte_bswap16(key_in->key.qinq.svlan);
+ qinq->ethertype_cvlan = 0;
+ qinq->cvlan = rte_bswap16(key_in->key.qinq.cvlan);
+
+ if (signature)
+ *signature = (uint32_t) hash_default_key8(qinq, 8, 0);
+ return 0;
+ }
+
+ case FLOW_KEY_IPV4_5TUPLE:
+ {
+ struct pkt_key_ipv4_5tuple *ipv4 = key_buffer;
+
+ ipv4->ttl = 0;
+ ipv4->proto = key_in->key.ipv4_5tuple.proto;
+ ipv4->checksum = 0;
+ ipv4->ip_src = rte_bswap32(key_in->key.ipv4_5tuple.ip_src);
+ ipv4->ip_dst = rte_bswap32(key_in->key.ipv4_5tuple.ip_dst);
+ ipv4->port_src = rte_bswap16(key_in->key.ipv4_5tuple.port_src);
+ ipv4->port_dst = rte_bswap16(key_in->key.ipv4_5tuple.port_dst);
+
+ if (signature)
+ *signature = (uint32_t) hash_default_key16(ipv4, 16, 0);
+ return 0;
+ }
+
+ case FLOW_KEY_IPV6_5TUPLE:
+ {
+ struct pkt_key_ipv6_5tuple *ipv6 = key_buffer;
+
+ memset(ipv6, 0, 64);
+ ipv6->payload_length = 0;
+ ipv6->proto = key_in->key.ipv6_5tuple.proto;
+ ipv6->hop_limit = 0;
+ memcpy(&ipv6->ip_src, &key_in->key.ipv6_5tuple.ip_src, 16);
+ memcpy(&ipv6->ip_dst, &key_in->key.ipv6_5tuple.ip_dst, 16);
+ ipv6->port_src = rte_bswap16(key_in->key.ipv6_5tuple.port_src);
+ ipv6->port_dst = rte_bswap16(key_in->key.ipv6_5tuple.port_dst);
+
+ if (signature)
+ *signature = (uint32_t) hash_default_key64(ipv6, 64, 0);
+ return 0;
+ }
+
+ default:
+ return -1;
+ }
+}
+
+/*
+ * Flow classification pipeline
+ */
+
+struct app_pipeline_fc_flow {
+ struct pipeline_fc_key key;
+ uint32_t port_id;
+ uint32_t flow_id;
+ uint32_t signature;
+ void *entry_ptr;
+
+ TAILQ_ENTRY(app_pipeline_fc_flow) node;
+};
+
+#define N_BUCKETS 65536
+
+struct app_pipeline_fc {
+ /* Parameters */
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+
+ /* Flows */
+ TAILQ_HEAD(, app_pipeline_fc_flow) flows[N_BUCKETS];
+ uint32_t n_flows;
+
+ /* Default flow */
+ uint32_t default_flow_present;
+ uint32_t default_flow_port_id;
+ void *default_flow_entry_ptr;
+};
+
+static struct app_pipeline_fc_flow *
+app_pipeline_fc_flow_find(struct app_pipeline_fc *p,
+ struct pipeline_fc_key *key)
+{
+ struct app_pipeline_fc_flow *f;
+ uint32_t signature, bucket_id;
+
+ app_pipeline_fc_key_convert(key, NULL, &signature);
+ bucket_id = signature & (N_BUCKETS - 1);
+
+ TAILQ_FOREACH(f, &p->flows[bucket_id], node)
+ if ((signature == f->signature) &&
+ (memcmp(key,
+ &f->key,
+ sizeof(struct pipeline_fc_key)) == 0))
+ return f;
+
+ return NULL;
+}
+
+static void*
+app_pipeline_fc_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct app_pipeline_fc *p;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct app_pipeline_fc));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+
+ /* Initialization */
+ p->n_ports_in = params->n_ports_in;
+ p->n_ports_out = params->n_ports_out;
+
+ for (i = 0; i < N_BUCKETS; i++)
+ TAILQ_INIT(&p->flows[i]);
+ p->n_flows = 0;
+
+ return (void *) p;
+}
+
+static int
+app_pipeline_fc_free(void *pipeline)
+{
+ struct app_pipeline_fc *p = pipeline;
+ uint32_t i;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ for (i = 0; i < N_BUCKETS; i++)
+ while (!TAILQ_EMPTY(&p->flows[i])) {
+ struct app_pipeline_fc_flow *flow;
+
+ flow = TAILQ_FIRST(&p->flows[i]);
+ TAILQ_REMOVE(&p->flows[i], flow, node);
+ rte_free(flow);
+ }
+
+ rte_free(p);
+ return 0;
+}
+
+static int
+app_pipeline_fc_key_check(struct pipeline_fc_key *key)
+{
+ switch (key->type) {
+ case FLOW_KEY_QINQ:
+ {
+ uint16_t svlan = key->key.qinq.svlan;
+ uint16_t cvlan = key->key.qinq.cvlan;
+
+ if ((svlan & 0xF000) ||
+ (cvlan & 0xF000))
+ return -1;
+
+ return 0;
+ }
+
+ case FLOW_KEY_IPV4_5TUPLE:
+ return 0;
+
+ case FLOW_KEY_IPV6_5TUPLE:
+ return 0;
+
+ default:
+ return -1;
+ }
+}
+
+int
+app_pipeline_fc_add(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key,
+ uint32_t port_id,
+ uint32_t flow_id)
+{
+ struct app_pipeline_fc *p;
+ struct app_pipeline_fc_flow *flow;
+
+ struct pipeline_fc_add_msg_req *req;
+ struct pipeline_fc_add_msg_rsp *rsp;
+
+ uint32_t signature;
+ int new_flow;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ if (app_pipeline_fc_key_check(key) != 0)
+ return -1;
+
+ /* Find existing flow or allocate new flow */
+ flow = app_pipeline_fc_flow_find(p, key);
+ new_flow = (flow == NULL);
+ if (flow == NULL) {
+ flow = rte_malloc(NULL, sizeof(*flow), RTE_CACHE_LINE_SIZE);
+
+ if (flow == NULL)
+ return -1;
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD;
+ app_pipeline_fc_key_convert(key, req->key, &signature);
+ req->port_id = port_id;
+ req->flow_id = flow_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ if (new_flow)
+ rte_free(flow);
+ return -1;
+ }
+
+ /* Read response and write flow */
+ if (rsp->status ||
+ (rsp->entry_ptr == NULL) ||
+ ((new_flow == 0) && (rsp->key_found == 0)) ||
+ ((new_flow == 1) && (rsp->key_found == 1))) {
+ app_msg_free(app, rsp);
+ if (new_flow)
+ rte_free(flow);
+ return -1;
+ }
+
+ memset(&flow->key, 0, sizeof(flow->key));
+ memcpy(&flow->key, key, sizeof(flow->key));
+ flow->port_id = port_id;
+ flow->flow_id = flow_id;
+ flow->signature = signature;
+ flow->entry_ptr = rsp->entry_ptr;
+
+ /* Commit rule */
+ if (new_flow) {
+ uint32_t bucket_id = signature & (N_BUCKETS - 1);
+
+ TAILQ_INSERT_TAIL(&p->flows[bucket_id], flow, node);
+ p->n_flows++;
+ }
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_fc_add_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key,
+ uint32_t *port_id,
+ uint32_t *flow_id,
+ uint32_t n_keys)
+{
+ struct app_pipeline_fc *p;
+ struct pipeline_fc_add_bulk_msg_req *req;
+ struct pipeline_fc_add_bulk_msg_rsp *rsp;
+
+ struct app_pipeline_fc_flow **flow;
+ uint32_t *signature;
+ int *new_flow;
+ struct pipeline_fc_add_bulk_flow_req *flow_req;
+ struct pipeline_fc_add_bulk_flow_rsp *flow_rsp;
+
+ uint32_t i;
+ int status;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL) ||
+ (port_id == NULL) ||
+ (flow_id == NULL) ||
+ (n_keys == 0))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -1;
+
+ for (i = 0; i < n_keys; i++)
+ if (port_id[i] >= p->n_ports_out)
+ return -1;
+
+ for (i = 0; i < n_keys; i++)
+ if (app_pipeline_fc_key_check(&key[i]) != 0)
+ return -1;
+
+ /* Memory allocation */
+ flow = rte_malloc(NULL,
+ n_keys * sizeof(struct app_pipeline_fc_flow *),
+ RTE_CACHE_LINE_SIZE);
+ if (flow == NULL)
+ return -1;
+
+ signature = rte_malloc(NULL,
+ n_keys * sizeof(uint32_t),
+ RTE_CACHE_LINE_SIZE);
+ if (signature == NULL) {
+ rte_free(flow);
+ return -1;
+ }
+
+ new_flow = rte_malloc(
+ NULL,
+ n_keys * sizeof(int),
+ RTE_CACHE_LINE_SIZE);
+ if (new_flow == NULL) {
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+
+ flow_req = rte_malloc(NULL,
+ n_keys * sizeof(struct pipeline_fc_add_bulk_flow_req),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_req == NULL) {
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+
+ flow_rsp = rte_malloc(NULL,
+ n_keys * sizeof(struct pipeline_fc_add_bulk_flow_rsp),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_rsp == NULL) {
+ rte_free(flow_req);
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+
+ /* Find existing flow or allocate new flow */
+ for (i = 0; i < n_keys; i++) {
+ flow[i] = app_pipeline_fc_flow_find(p, &key[i]);
+ new_flow[i] = (flow[i] == NULL);
+ if (flow[i] == NULL) {
+ flow[i] = rte_zmalloc(NULL,
+ sizeof(struct app_pipeline_fc_flow),
+ RTE_CACHE_LINE_SIZE);
+
+ if (flow[i] == NULL) {
+ uint32_t j;
+
+ for (j = 0; j < i; j++)
+ if (new_flow[j])
+ rte_free(flow[j]);
+
+ rte_free(flow_rsp);
+ rte_free(flow_req);
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+ }
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ for (i = 0; i < n_keys; i++)
+ if (new_flow[i])
+ rte_free(flow[i]);
+
+ rte_free(flow_rsp);
+ rte_free(flow_req);
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+
+ for (i = 0; i < n_keys; i++) {
+ app_pipeline_fc_key_convert(&key[i],
+ flow_req[i].key,
+ &signature[i]);
+ flow_req[i].port_id = port_id[i];
+ flow_req[i].flow_id = flow_id[i];
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK;
+ req->req = flow_req;
+ req->rsp = flow_rsp;
+ req->n_keys = n_keys;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, 10000);
+ if (rsp == NULL) {
+ for (i = 0; i < n_keys; i++)
+ if (new_flow[i])
+ rte_free(flow[i]);
+
+ rte_free(flow_rsp);
+ rte_free(flow_req);
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+ return -1;
+ }
+
+ /* Read response */
+ status = 0;
+
+ for (i = 0; i < rsp->n_keys; i++)
+ if ((flow_rsp[i].entry_ptr == NULL) ||
+ ((new_flow[i] == 0) && (flow_rsp[i].key_found == 0)) ||
+ ((new_flow[i] == 1) && (flow_rsp[i].key_found == 1)))
+ status = -1;
+
+ if (rsp->n_keys < n_keys)
+ status = -1;
+
+ /* Commit flows */
+ for (i = 0; i < rsp->n_keys; i++) {
+ memcpy(&flow[i]->key, &key[i], sizeof(flow[i]->key));
+ flow[i]->port_id = port_id[i];
+ flow[i]->flow_id = flow_id[i];
+ flow[i]->signature = signature[i];
+ flow[i]->entry_ptr = flow_rsp[i].entry_ptr;
+
+ if (new_flow[i]) {
+ uint32_t bucket_id = signature[i] & (N_BUCKETS - 1);
+
+ TAILQ_INSERT_TAIL(&p->flows[bucket_id], flow[i], node);
+ p->n_flows++;
+ }
+ }
+
+ /* Free resources */
+ app_msg_free(app, rsp);
+
+ for (i = rsp->n_keys; i < n_keys; i++)
+ if (new_flow[i])
+ rte_free(flow[i]);
+
+ rte_free(flow_rsp);
+ rte_free(flow_req);
+ rte_free(new_flow);
+ rte_free(signature);
+ rte_free(flow);
+
+ return status;
+}
+
+int
+app_pipeline_fc_del(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key)
+{
+ struct app_pipeline_fc *p;
+ struct app_pipeline_fc_flow *flow;
+
+ struct pipeline_fc_del_msg_req *req;
+ struct pipeline_fc_del_msg_rsp *rsp;
+
+ uint32_t signature, bucket_id;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -1;
+
+ if (app_pipeline_fc_key_check(key) != 0)
+ return -1;
+
+ /* Find rule */
+ flow = app_pipeline_fc_flow_find(p, key);
+ if (flow == NULL)
+ return 0;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FC_MSG_REQ_FLOW_DEL;
+ app_pipeline_fc_key_convert(key, req->key, &signature);
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status || !rsp->key_found) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Remove rule */
+ bucket_id = signature & (N_BUCKETS - 1);
+ TAILQ_REMOVE(&p->flows[bucket_id], flow, node);
+ p->n_flows--;
+ rte_free(flow);
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_fc_add_default(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct app_pipeline_fc *p;
+
+ struct pipeline_fc_add_default_msg_req *req;
+ struct pipeline_fc_add_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write flow */
+ if (rsp->status || (rsp->entry_ptr == NULL)) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ p->default_flow_port_id = port_id;
+ p->default_flow_entry_ptr = rsp->entry_ptr;
+
+ /* Commit route */
+ p->default_flow_present = 1;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_fc_del_default(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_fc *p;
+
+ struct pipeline_fc_del_default_msg_req *req;
+ struct pipeline_fc_del_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -EINVAL;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Commit route */
+ p->default_flow_present = 0;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+/*
+ * Flow ls
+ */
+
+static void
+print_fc_qinq_flow(struct app_pipeline_fc_flow *flow)
+{
+ printf("(SVLAN = %" PRIu32 ", "
+ "CVLAN = %" PRIu32 ") => "
+ "Port = %" PRIu32 ", "
+ "Flow ID = %" PRIu32 ", "
+ "(signature = 0x%08" PRIx32 ", "
+ "entry_ptr = %p)\n",
+
+ flow->key.key.qinq.svlan,
+ flow->key.key.qinq.cvlan,
+ flow->port_id,
+ flow->flow_id,
+ flow->signature,
+ flow->entry_ptr);
+}
+
+static void
+print_fc_ipv4_5tuple_flow(struct app_pipeline_fc_flow *flow)
+{
+ printf("(SA = %" PRIu32 ".%" PRIu32 ".%" PRIu32 ".%" PRIu32 ", "
+ "DA = %" PRIu32 ".%" PRIu32 ".%" PRIu32 ".%" PRIu32 ", "
+ "SP = %" PRIu32 ", "
+ "DP = %" PRIu32 ", "
+ "Proto = %" PRIu32 ") => "
+ "Port = %" PRIu32 ", "
+ "Flow ID = %" PRIu32 " "
+ "(signature = 0x%08" PRIx32 ", "
+ "entry_ptr = %p)\n",
+
+ (flow->key.key.ipv4_5tuple.ip_src >> 24) & 0xFF,
+ (flow->key.key.ipv4_5tuple.ip_src >> 16) & 0xFF,
+ (flow->key.key.ipv4_5tuple.ip_src >> 8) & 0xFF,
+ flow->key.key.ipv4_5tuple.ip_src & 0xFF,
+
+ (flow->key.key.ipv4_5tuple.ip_dst >> 24) & 0xFF,
+ (flow->key.key.ipv4_5tuple.ip_dst >> 16) & 0xFF,
+ (flow->key.key.ipv4_5tuple.ip_dst >> 8) & 0xFF,
+ flow->key.key.ipv4_5tuple.ip_dst & 0xFF,
+
+ flow->key.key.ipv4_5tuple.port_src,
+ flow->key.key.ipv4_5tuple.port_dst,
+
+ flow->key.key.ipv4_5tuple.proto,
+
+ flow->port_id,
+ flow->flow_id,
+ flow->signature,
+ flow->entry_ptr);
+}
+
+static void
+print_fc_ipv6_5tuple_flow(struct app_pipeline_fc_flow *flow) {
+ printf("(SA = %02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 ", "
+ "DA = %02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32
+ ":%02" PRIx32 "%02" PRIx32 ":%02" PRIx32 "%02" PRIx32 ", "
+ "SP = %" PRIu32 ", "
+ "DP = %" PRIu32 " "
+ "Proto = %" PRIu32 " "
+ "=> Port = %" PRIu32 ", "
+ "Flow ID = %" PRIu32 " "
+ "(signature = 0x%08" PRIx32 ", "
+ "entry_ptr = %p)\n",
+
+ flow->key.key.ipv6_5tuple.ip_src[0],
+ flow->key.key.ipv6_5tuple.ip_src[1],
+ flow->key.key.ipv6_5tuple.ip_src[2],
+ flow->key.key.ipv6_5tuple.ip_src[3],
+ flow->key.key.ipv6_5tuple.ip_src[4],
+ flow->key.key.ipv6_5tuple.ip_src[5],
+ flow->key.key.ipv6_5tuple.ip_src[6],
+ flow->key.key.ipv6_5tuple.ip_src[7],
+ flow->key.key.ipv6_5tuple.ip_src[8],
+ flow->key.key.ipv6_5tuple.ip_src[9],
+ flow->key.key.ipv6_5tuple.ip_src[10],
+ flow->key.key.ipv6_5tuple.ip_src[11],
+ flow->key.key.ipv6_5tuple.ip_src[12],
+ flow->key.key.ipv6_5tuple.ip_src[13],
+ flow->key.key.ipv6_5tuple.ip_src[14],
+ flow->key.key.ipv6_5tuple.ip_src[15],
+
+ flow->key.key.ipv6_5tuple.ip_dst[0],
+ flow->key.key.ipv6_5tuple.ip_dst[1],
+ flow->key.key.ipv6_5tuple.ip_dst[2],
+ flow->key.key.ipv6_5tuple.ip_dst[3],
+ flow->key.key.ipv6_5tuple.ip_dst[4],
+ flow->key.key.ipv6_5tuple.ip_dst[5],
+ flow->key.key.ipv6_5tuple.ip_dst[6],
+ flow->key.key.ipv6_5tuple.ip_dst[7],
+ flow->key.key.ipv6_5tuple.ip_dst[8],
+ flow->key.key.ipv6_5tuple.ip_dst[9],
+ flow->key.key.ipv6_5tuple.ip_dst[10],
+ flow->key.key.ipv6_5tuple.ip_dst[11],
+ flow->key.key.ipv6_5tuple.ip_dst[12],
+ flow->key.key.ipv6_5tuple.ip_dst[13],
+ flow->key.key.ipv6_5tuple.ip_dst[14],
+ flow->key.key.ipv6_5tuple.ip_dst[15],
+
+ flow->key.key.ipv6_5tuple.port_src,
+ flow->key.key.ipv6_5tuple.port_dst,
+
+ flow->key.key.ipv6_5tuple.proto,
+
+ flow->port_id,
+ flow->flow_id,
+ flow->signature,
+ flow->entry_ptr);
+}
+
+static void
+print_fc_flow(struct app_pipeline_fc_flow *flow)
+{
+ switch (flow->key.type) {
+ case FLOW_KEY_QINQ:
+ print_fc_qinq_flow(flow);
+ break;
+
+ case FLOW_KEY_IPV4_5TUPLE:
+ print_fc_ipv4_5tuple_flow(flow);
+ break;
+
+ case FLOW_KEY_IPV6_5TUPLE:
+ print_fc_ipv6_5tuple_flow(flow);
+ break;
+ }
+}
+
+static int
+app_pipeline_fc_ls(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct app_pipeline_fc *p;
+ struct app_pipeline_fc_flow *flow;
+ uint32_t i;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_flow_classification);
+ if (p == NULL)
+ return -1;
+
+ for (i = 0; i < N_BUCKETS; i++)
+ TAILQ_FOREACH(flow, &p->flows[i], node)
+ print_fc_flow(flow);
+
+ if (p->default_flow_present)
+ printf("Default flow: port %" PRIu32 " (entry ptr = %p)\n",
+ p->default_flow_port_id,
+ p->default_flow_entry_ptr);
+ else
+ printf("Default: DROP\n");
+
+ return 0;
+}
+
+/*
+ * flow add qinq
+ */
+
+struct cmd_fc_add_qinq_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t qinq_string;
+ uint16_t svlan;
+ uint16_t cvlan;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t flowid_string;
+ uint32_t flow_id;
+};
+
+static void
+cmd_fc_add_qinq_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_qinq_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_QINQ;
+ key.key.qinq.svlan = params->svlan;
+ key.key.qinq.cvlan = params->cvlan;
+
+ status = app_pipeline_fc_add(app,
+ params->pipeline_id,
+ &key,
+ params->port,
+ params->flow_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, flow_string,
+ "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, add_string,
+ "add");
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_qinq_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, qinq_string,
+ "qinq");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_svlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, svlan, UINT16);
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_cvlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, cvlan, UINT16);
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, port_string,
+ "port");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, port, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_flowid_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_result, flowid_string,
+ "flowid");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_result, flow_id, UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_qinq = {
+ .f = cmd_fc_add_qinq_parsed,
+ .data = NULL,
+ .help_str = "Flow add (Q-in-Q)",
+ .tokens = {
+ (void *) &cmd_fc_add_qinq_p_string,
+ (void *) &cmd_fc_add_qinq_pipeline_id,
+ (void *) &cmd_fc_add_qinq_flow_string,
+ (void *) &cmd_fc_add_qinq_add_string,
+ (void *) &cmd_fc_add_qinq_qinq_string,
+ (void *) &cmd_fc_add_qinq_svlan,
+ (void *) &cmd_fc_add_qinq_cvlan,
+ (void *) &cmd_fc_add_qinq_port_string,
+ (void *) &cmd_fc_add_qinq_port,
+ (void *) &cmd_fc_add_qinq_flowid_string,
+ (void *) &cmd_fc_add_qinq_flow_id,
+ NULL,
+ },
+};
+
+/*
+ * flow add qinq all
+ */
+
+struct cmd_fc_add_qinq_all_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t qinq_string;
+ cmdline_fixed_string_t all_string;
+ uint32_t n_flows;
+ uint32_t n_ports;
+};
+
+#ifndef N_FLOWS_BULK
+#define N_FLOWS_BULK 4096
+#endif
+
+static void
+cmd_fc_add_qinq_all_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_qinq_all_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key *key;
+ uint32_t *port_id;
+ uint32_t *flow_id;
+ uint32_t id;
+
+ /* Check input arguments */
+ if (params->n_flows == 0) {
+ printf("Invalid number of flows\n");
+ return;
+ }
+
+ if (params->n_ports == 0) {
+ printf("Invalid number of output ports\n");
+ return;
+ }
+
+ /* Memory allocation */
+ key = rte_zmalloc(NULL,
+ N_FLOWS_BULK * sizeof(*key),
+ RTE_CACHE_LINE_SIZE);
+ if (key == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ port_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*port_id),
+ RTE_CACHE_LINE_SIZE);
+ if (port_id == NULL) {
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*flow_id),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ rte_free(port_id);
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ /* Flow add */
+ for (id = 0; id < params->n_flows; id++) {
+ uint32_t pos = id & (N_FLOWS_BULK - 1);
+
+ key[pos].type = FLOW_KEY_QINQ;
+ key[pos].key.qinq.svlan = id >> 12;
+ key[pos].key.qinq.cvlan = id & 0xFFF;
+
+ port_id[pos] = id % params->n_ports;
+ flow_id[pos] = id;
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (id == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fc_add_bulk(app,
+ params->pipeline_id,
+ key,
+ port_id,
+ flow_id,
+ pos + 1);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ /* Memory free */
+ rte_free(flow_id);
+ rte_free(port_id);
+ rte_free(key);
+}
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_all_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_all_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_all_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, flow_string,
+ "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_all_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, add_string,
+ "add");
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_all_qinq_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, qinq_string,
+ "qinq");
+
+cmdline_parse_token_string_t cmd_fc_add_qinq_all_all_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_qinq_all_result, all_string,
+ "all");
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_all_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, n_flows,
+ UINT32);
+
+cmdline_parse_token_num_t cmd_fc_add_qinq_all_n_ports =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_qinq_all_result, n_ports,
+ UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_qinq_all = {
+ .f = cmd_fc_add_qinq_all_parsed,
+ .data = NULL,
+ .help_str = "Flow add all (Q-in-Q)",
+ .tokens = {
+ (void *) &cmd_fc_add_qinq_all_p_string,
+ (void *) &cmd_fc_add_qinq_all_pipeline_id,
+ (void *) &cmd_fc_add_qinq_all_flow_string,
+ (void *) &cmd_fc_add_qinq_all_add_string,
+ (void *) &cmd_fc_add_qinq_all_qinq_string,
+ (void *) &cmd_fc_add_qinq_all_all_string,
+ (void *) &cmd_fc_add_qinq_all_n_flows,
+ (void *) &cmd_fc_add_qinq_all_n_ports,
+ NULL,
+ },
+};
+
+/*
+ * flow add ipv4_5tuple
+ */
+
+struct cmd_fc_add_ipv4_5tuple_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t ipv4_5tuple_string;
+ cmdline_ipaddr_t ip_src;
+ cmdline_ipaddr_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t flowid_string;
+ uint32_t flow_id;
+};
+
+static void
+cmd_fc_add_ipv4_5tuple_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_ipv4_5tuple_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_IPV4_5TUPLE;
+ key.key.ipv4_5tuple.ip_src = rte_bswap32(
+ params->ip_src.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.ip_dst = rte_bswap32(
+ params->ip_dst.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.port_src = params->port_src;
+ key.key.ipv4_5tuple.port_dst = params->port_dst;
+ key.key.ipv4_5tuple.proto = params->proto;
+
+ status = app_pipeline_fc_add(app,
+ params->pipeline_id,
+ &key,
+ params->port,
+ params->flow_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_ipv4_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result,
+ ipv4_5tuple_string, "ipv4_5tuple");
+
+cmdline_parse_token_ipaddr_t cmd_fc_add_ipv4_5tuple_ip_src =
+ TOKEN_IPV4_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, ip_src);
+
+cmdline_parse_token_ipaddr_t cmd_fc_add_ipv4_5tuple_ip_dst =
+ TOKEN_IPV4_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, ip_dst);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port_src =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_src,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port_dst =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_dst,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, proto,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port_string,
+ "port");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, port,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_flowid_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result,
+ flowid_string, "flowid");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_result, flow_id,
+ UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_ipv4_5tuple = {
+ .f = cmd_fc_add_ipv4_5tuple_parsed,
+ .data = NULL,
+ .help_str = "Flow add (IPv4 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_add_ipv4_5tuple_p_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_pipeline_id,
+ (void *) &cmd_fc_add_ipv4_5tuple_flow_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_add_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_ipv4_5tuple_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_ip_src,
+ (void *) &cmd_fc_add_ipv4_5tuple_ip_dst,
+ (void *) &cmd_fc_add_ipv4_5tuple_port_src,
+ (void *) &cmd_fc_add_ipv4_5tuple_port_dst,
+ (void *) &cmd_fc_add_ipv4_5tuple_proto,
+ (void *) &cmd_fc_add_ipv4_5tuple_port_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_port,
+ (void *) &cmd_fc_add_ipv4_5tuple_flowid_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_flow_id,
+ NULL,
+ },
+};
+
+/*
+ * flow add ipv4_5tuple all
+ */
+
+struct cmd_fc_add_ipv4_5tuple_all_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t ipv4_5tuple_string;
+ cmdline_fixed_string_t all_string;
+ uint32_t n_flows;
+ uint32_t n_ports;
+};
+
+static void
+cmd_fc_add_ipv4_5tuple_all_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_ipv4_5tuple_all_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key *key;
+ uint32_t *port_id;
+ uint32_t *flow_id;
+ uint32_t id;
+
+ /* Check input parameters */
+ if (params->n_flows == 0) {
+ printf("Invalid number of flows\n");
+ return;
+ }
+
+ if (params->n_ports == 0) {
+ printf("Invalid number of ports\n");
+ return;
+ }
+
+ /* Memory allocation */
+ key = rte_zmalloc(NULL,
+ N_FLOWS_BULK * sizeof(*key),
+ RTE_CACHE_LINE_SIZE);
+ if (key == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ port_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*port_id),
+ RTE_CACHE_LINE_SIZE);
+ if (port_id == NULL) {
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*flow_id),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ rte_free(port_id);
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ /* Flow add */
+ for (id = 0; id < params->n_flows; id++) {
+ uint32_t pos = id & (N_FLOWS_BULK - 1);
+
+ key[pos].type = FLOW_KEY_IPV4_5TUPLE;
+ key[pos].key.ipv4_5tuple.ip_src = 0;
+ key[pos].key.ipv4_5tuple.ip_dst = id;
+ key[pos].key.ipv4_5tuple.port_src = 0;
+ key[pos].key.ipv4_5tuple.port_dst = 0;
+ key[pos].key.ipv4_5tuple.proto = 6;
+
+ port_id[pos] = id % params->n_ports;
+ flow_id[pos] = id;
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (id == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fc_add_bulk(app,
+ params->pipeline_id,
+ key,
+ port_id,
+ flow_id,
+ pos + 1);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ /* Memory free */
+ rte_free(flow_id);
+ rte_free(port_id);
+ rte_free(key);
+}
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_ipv4_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ ipv4_5tuple_string, "ipv4_5tuple");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv4_5tuple_all_all_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ all_string, "all");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ n_flows, UINT32);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv4_5tuple_all_n_ports =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv4_5tuple_all_result,
+ n_ports, UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_ipv4_5tuple_all = {
+ .f = cmd_fc_add_ipv4_5tuple_all_parsed,
+ .data = NULL,
+ .help_str = "Flow add all (IPv4 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_add_ipv4_5tuple_all_p_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_pipeline_id,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_flow_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_add_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_ipv4_5tuple_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_all_string,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_n_flows,
+ (void *) &cmd_fc_add_ipv4_5tuple_all_n_ports,
+ NULL,
+ },
+};
+
+/*
+ * flow add ipv6_5tuple
+ */
+
+struct cmd_fc_add_ipv6_5tuple_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t ipv6_5tuple_string;
+ cmdline_ipaddr_t ip_src;
+ cmdline_ipaddr_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t flowid_string;
+ uint32_t flow_id;
+};
+
+static void
+cmd_fc_add_ipv6_5tuple_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_ipv6_5tuple_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_IPV6_5TUPLE;
+ memcpy(key.key.ipv6_5tuple.ip_src,
+ params->ip_src.addr.ipv6.s6_addr,
+ 16);
+ memcpy(key.key.ipv6_5tuple.ip_dst,
+ params->ip_dst.addr.ipv6.s6_addr,
+ 16);
+ key.key.ipv6_5tuple.port_src = params->port_src;
+ key.key.ipv6_5tuple.port_dst = params->port_dst;
+ key.key.ipv6_5tuple.proto = params->proto;
+
+ status = app_pipeline_fc_add(app,
+ params->pipeline_id,
+ &key,
+ params->port,
+ params->flow_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_ipv6_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ ipv6_5tuple_string, "ipv6_5tuple");
+
+cmdline_parse_token_ipaddr_t cmd_fc_add_ipv6_5tuple_ip_src =
+ TOKEN_IPV6_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, ip_src);
+
+cmdline_parse_token_ipaddr_t cmd_fc_add_ipv6_5tuple_ip_dst =
+ TOKEN_IPV6_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, ip_dst);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port_src =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port_src,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port_dst =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port_dst,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, proto,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ port_string, "port");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, port,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_flowid_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result,
+ flowid_string, "flowid");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_flow_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_result, flow_id,
+ UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_ipv6_5tuple = {
+ .f = cmd_fc_add_ipv6_5tuple_parsed,
+ .data = NULL,
+ .help_str = "Flow add (IPv6 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_add_ipv6_5tuple_p_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_pipeline_id,
+ (void *) &cmd_fc_add_ipv6_5tuple_flow_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_add_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_ipv6_5tuple_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_ip_src,
+ (void *) &cmd_fc_add_ipv6_5tuple_ip_dst,
+ (void *) &cmd_fc_add_ipv6_5tuple_port_src,
+ (void *) &cmd_fc_add_ipv6_5tuple_port_dst,
+ (void *) &cmd_fc_add_ipv6_5tuple_proto,
+ (void *) &cmd_fc_add_ipv6_5tuple_port_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_port,
+ (void *) &cmd_fc_add_ipv6_5tuple_flowid_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_flow_id,
+ NULL,
+ },
+};
+
+/*
+ * flow add ipv6_5tuple all
+ */
+
+struct cmd_fc_add_ipv6_5tuple_all_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t ipv6_5tuple_string;
+ cmdline_fixed_string_t all_string;
+ uint32_t n_flows;
+ uint32_t n_ports;
+};
+
+static void
+cmd_fc_add_ipv6_5tuple_all_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_ipv6_5tuple_all_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key *key;
+ uint32_t *port_id;
+ uint32_t *flow_id;
+ uint32_t id;
+
+ /* Check input parameters */
+ if (params->n_flows == 0) {
+ printf("Invalid number of flows\n");
+ return;
+ }
+
+ if (params->n_ports == 0) {
+ printf("Invalid number of ports\n");
+ return;
+ }
+
+ /* Memory allocation */
+ key = rte_zmalloc(NULL,
+ N_FLOWS_BULK * sizeof(*key),
+ RTE_CACHE_LINE_SIZE);
+ if (key == NULL) {
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ port_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*port_id),
+ RTE_CACHE_LINE_SIZE);
+ if (port_id == NULL) {
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ flow_id = rte_malloc(NULL,
+ N_FLOWS_BULK * sizeof(*flow_id),
+ RTE_CACHE_LINE_SIZE);
+ if (flow_id == NULL) {
+ rte_free(port_id);
+ rte_free(key);
+ printf("Memory allocation failed\n");
+ return;
+ }
+
+ /* Flow add */
+ for (id = 0; id < params->n_flows; id++) {
+ uint32_t pos = id & (N_FLOWS_BULK - 1);
+ uint32_t *x;
+
+ key[pos].type = FLOW_KEY_IPV6_5TUPLE;
+ x = (uint32_t *) key[pos].key.ipv6_5tuple.ip_dst;
+ *x = rte_bswap32(id);
+ key[pos].key.ipv6_5tuple.proto = 6;
+
+ port_id[pos] = id % params->n_ports;
+ flow_id[pos] = id;
+
+ if ((pos == N_FLOWS_BULK - 1) ||
+ (id == params->n_flows - 1)) {
+ int status;
+
+ status = app_pipeline_fc_add_bulk(app,
+ params->pipeline_id,
+ key,
+ port_id,
+ flow_id,
+ pos + 1);
+
+ if (status != 0) {
+ printf("Command failed\n");
+
+ break;
+ }
+ }
+ }
+
+ /* Memory free */
+ rte_free(flow_id);
+ rte_free(port_id);
+ rte_free(key);
+}
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_ipv6_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ ipv6_5tuple_string, "ipv6_5tuple");
+
+cmdline_parse_token_string_t cmd_fc_add_ipv6_5tuple_all_all_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ all_string, "all");
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_n_flows =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ n_flows, UINT32);
+
+cmdline_parse_token_num_t cmd_fc_add_ipv6_5tuple_all_n_ports =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_ipv6_5tuple_all_result,
+ n_ports, UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_ipv6_5tuple_all = {
+ .f = cmd_fc_add_ipv6_5tuple_all_parsed,
+ .data = NULL,
+ .help_str = "Flow add all (ipv6 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_add_ipv6_5tuple_all_p_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_pipeline_id,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_flow_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_add_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_ipv6_5tuple_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_all_string,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_n_flows,
+ (void *) &cmd_fc_add_ipv6_5tuple_all_n_ports,
+ NULL,
+ },
+};
+
+/*
+ * flow del qinq
+ */
+struct cmd_fc_del_qinq_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t qinq_string;
+ uint16_t svlan;
+ uint16_t cvlan;
+};
+
+static void
+cmd_fc_del_qinq_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_del_qinq_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_QINQ;
+ key.key.qinq.svlan = params->svlan;
+ key.key.qinq.cvlan = params->cvlan;
+ status = app_pipeline_fc_del(app, params->pipeline_id, &key);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_del_qinq_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_del_qinq_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_del_qinq_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, flow_string,
+ "flow");
+
+cmdline_parse_token_string_t cmd_fc_del_qinq_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, del_string,
+ "del");
+
+cmdline_parse_token_string_t cmd_fc_del_qinq_qinq_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_qinq_result, qinq_string,
+ "qinq");
+
+cmdline_parse_token_num_t cmd_fc_del_qinq_svlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, svlan, UINT16);
+
+cmdline_parse_token_num_t cmd_fc_del_qinq_cvlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_qinq_result, cvlan, UINT16);
+
+cmdline_parse_inst_t cmd_fc_del_qinq = {
+ .f = cmd_fc_del_qinq_parsed,
+ .data = NULL,
+ .help_str = "Flow delete (Q-in-Q)",
+ .tokens = {
+ (void *) &cmd_fc_del_qinq_p_string,
+ (void *) &cmd_fc_del_qinq_pipeline_id,
+ (void *) &cmd_fc_del_qinq_flow_string,
+ (void *) &cmd_fc_del_qinq_del_string,
+ (void *) &cmd_fc_del_qinq_qinq_string,
+ (void *) &cmd_fc_del_qinq_svlan,
+ (void *) &cmd_fc_del_qinq_cvlan,
+ NULL,
+ },
+};
+
+/*
+ * flow del ipv4_5tuple
+ */
+
+struct cmd_fc_del_ipv4_5tuple_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t ipv4_5tuple_string;
+ cmdline_ipaddr_t ip_src;
+ cmdline_ipaddr_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+};
+
+static void
+cmd_fc_del_ipv4_5tuple_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_del_ipv4_5tuple_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_IPV4_5TUPLE;
+ key.key.ipv4_5tuple.ip_src = rte_bswap32(
+ params->ip_src.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.ip_dst = rte_bswap32(
+ params->ip_dst.addr.ipv4.s_addr);
+ key.key.ipv4_5tuple.port_src = params->port_src;
+ key.key.ipv4_5tuple.port_dst = params->port_dst;
+ key.key.ipv4_5tuple.proto = params->proto;
+
+ status = app_pipeline_fc_del(app, params->pipeline_id, &key);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ del_string, "del");
+
+cmdline_parse_token_string_t cmd_fc_del_ipv4_5tuple_ipv4_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ ipv4_5tuple_string, "ipv4_5tuple");
+
+cmdline_parse_token_ipaddr_t cmd_fc_del_ipv4_5tuple_ip_src =
+ TOKEN_IPV4_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ ip_src);
+
+cmdline_parse_token_ipaddr_t cmd_fc_del_ipv4_5tuple_ip_dst =
+ TOKEN_IPV4_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result, ip_dst);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_port_src =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ port_src, UINT16);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_port_dst =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ port_dst, UINT16);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv4_5tuple_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv4_5tuple_result,
+ proto, UINT32);
+
+cmdline_parse_inst_t cmd_fc_del_ipv4_5tuple = {
+ .f = cmd_fc_del_ipv4_5tuple_parsed,
+ .data = NULL,
+ .help_str = "Flow delete (IPv4 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_del_ipv4_5tuple_p_string,
+ (void *) &cmd_fc_del_ipv4_5tuple_pipeline_id,
+ (void *) &cmd_fc_del_ipv4_5tuple_flow_string,
+ (void *) &cmd_fc_del_ipv4_5tuple_del_string,
+ (void *) &cmd_fc_del_ipv4_5tuple_ipv4_5tuple_string,
+ (void *) &cmd_fc_del_ipv4_5tuple_ip_src,
+ (void *) &cmd_fc_del_ipv4_5tuple_ip_dst,
+ (void *) &cmd_fc_del_ipv4_5tuple_port_src,
+ (void *) &cmd_fc_del_ipv4_5tuple_port_dst,
+ (void *) &cmd_fc_del_ipv4_5tuple_proto,
+ NULL,
+ },
+};
+
+/*
+ * flow del ipv6_5tuple
+ */
+
+struct cmd_fc_del_ipv6_5tuple_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t ipv6_5tuple_string;
+ cmdline_ipaddr_t ip_src;
+ cmdline_ipaddr_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+};
+
+static void
+cmd_fc_del_ipv6_5tuple_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_del_ipv6_5tuple_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_fc_key key;
+ int status;
+
+ memset(&key, 0, sizeof(key));
+ key.type = FLOW_KEY_IPV6_5TUPLE;
+ memcpy(key.key.ipv6_5tuple.ip_src,
+ params->ip_src.addr.ipv6.s6_addr,
+ 16);
+ memcpy(key.key.ipv6_5tuple.ip_dst,
+ params->ip_dst.addr.ipv6.s6_addr,
+ 16);
+ key.key.ipv6_5tuple.port_src = params->port_src;
+ key.key.ipv6_5tuple.port_dst = params->port_dst;
+ key.key.ipv6_5tuple.proto = params->proto;
+
+ status = app_pipeline_fc_del(app, params->pipeline_id, &key);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result,
+ p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result,
+ pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result,
+ del_string, "del");
+
+cmdline_parse_token_string_t cmd_fc_del_ipv6_5tuple_ipv6_5tuple_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result,
+ ipv6_5tuple_string, "ipv6_5tuple");
+
+cmdline_parse_token_ipaddr_t cmd_fc_del_ipv6_5tuple_ip_src =
+ TOKEN_IPV6_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, ip_src);
+
+cmdline_parse_token_ipaddr_t cmd_fc_del_ipv6_5tuple_ip_dst =
+ TOKEN_IPV6_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, ip_dst);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_port_src =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, port_src,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_port_dst =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, port_dst,
+ UINT16);
+
+cmdline_parse_token_num_t cmd_fc_del_ipv6_5tuple_proto =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_ipv6_5tuple_result, proto,
+ UINT32);
+
+cmdline_parse_inst_t cmd_fc_del_ipv6_5tuple = {
+ .f = cmd_fc_del_ipv6_5tuple_parsed,
+ .data = NULL,
+ .help_str = "Flow delete (IPv6 5-tuple)",
+ .tokens = {
+ (void *) &cmd_fc_del_ipv6_5tuple_p_string,
+ (void *) &cmd_fc_del_ipv6_5tuple_pipeline_id,
+ (void *) &cmd_fc_del_ipv6_5tuple_flow_string,
+ (void *) &cmd_fc_del_ipv6_5tuple_del_string,
+ (void *) &cmd_fc_del_ipv6_5tuple_ipv6_5tuple_string,
+ (void *) &cmd_fc_del_ipv6_5tuple_ip_src,
+ (void *) &cmd_fc_del_ipv6_5tuple_ip_dst,
+ (void *) &cmd_fc_del_ipv6_5tuple_port_src,
+ (void *) &cmd_fc_del_ipv6_5tuple_port_dst,
+ (void *) &cmd_fc_del_ipv6_5tuple_proto,
+ NULL,
+ },
+};
+
+/*
+ * flow add default
+ */
+
+struct cmd_fc_add_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t default_string;
+ uint32_t port;
+};
+
+static void
+cmd_fc_add_default_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_add_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_fc_add_default(app, params->pipeline_id,
+ params->port);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_add_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_fc_add_default_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_default_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_add_default_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, flow_string,
+ "flow");
+
+cmdline_parse_token_string_t cmd_fc_add_default_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result, add_string,
+ "add");
+
+cmdline_parse_token_string_t cmd_fc_add_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_add_default_result,
+ default_string, "default");
+
+cmdline_parse_token_num_t cmd_fc_add_default_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_add_default_result, port, UINT32);
+
+cmdline_parse_inst_t cmd_fc_add_default = {
+ .f = cmd_fc_add_default_parsed,
+ .data = NULL,
+ .help_str = "Flow add default",
+ .tokens = {
+ (void *) &cmd_fc_add_default_p_string,
+ (void *) &cmd_fc_add_default_pipeline_id,
+ (void *) &cmd_fc_add_default_flow_string,
+ (void *) &cmd_fc_add_default_add_string,
+ (void *) &cmd_fc_add_default_default_string,
+ (void *) &cmd_fc_add_default_port,
+ NULL,
+ },
+};
+
+/*
+ * flow del default
+ */
+
+struct cmd_fc_del_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t default_string;
+};
+
+static void
+cmd_fc_del_default_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_del_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_fc_del_default(app, params->pipeline_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_del_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, p_string,
+ "p");
+
+cmdline_parse_token_num_t cmd_fc_del_default_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_del_default_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_fc_del_default_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, flow_string,
+ "flow");
+
+cmdline_parse_token_string_t cmd_fc_del_default_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result, del_string,
+ "del");
+
+cmdline_parse_token_string_t cmd_fc_del_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_del_default_result,
+ default_string, "default");
+
+cmdline_parse_inst_t cmd_fc_del_default = {
+ .f = cmd_fc_del_default_parsed,
+ .data = NULL,
+ .help_str = "Flow delete default",
+ .tokens = {
+ (void *) &cmd_fc_del_default_p_string,
+ (void *) &cmd_fc_del_default_pipeline_id,
+ (void *) &cmd_fc_del_default_flow_string,
+ (void *) &cmd_fc_del_default_del_string,
+ (void *) &cmd_fc_del_default_default_string,
+ NULL,
+ },
+};
+
+/*
+ * flow ls
+ */
+
+struct cmd_fc_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t flow_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_fc_ls_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_fc_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_fc_ls(app, params->pipeline_id);
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_fc_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result, p_string, "p");
+
+cmdline_parse_token_num_t cmd_fc_ls_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_fc_ls_result, pipeline_id, UINT32);
+
+cmdline_parse_token_string_t cmd_fc_ls_flow_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result,
+ flow_string, "flow");
+
+cmdline_parse_token_string_t cmd_fc_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_fc_ls_result, ls_string,
+ "ls");
+
+cmdline_parse_inst_t cmd_fc_ls = {
+ .f = cmd_fc_ls_parsed,
+ .data = NULL,
+ .help_str = "Flow list",
+ .tokens = {
+ (void *) &cmd_fc_ls_p_string,
+ (void *) &cmd_fc_ls_pipeline_id,
+ (void *) &cmd_fc_ls_flow_string,
+ (void *) &cmd_fc_ls_ls_string,
+ NULL,
+ },
+};
+
+static cmdline_parse_ctx_t pipeline_cmds[] = {
+ (cmdline_parse_inst_t *) &cmd_fc_add_qinq,
+ (cmdline_parse_inst_t *) &cmd_fc_add_ipv4_5tuple,
+ (cmdline_parse_inst_t *) &cmd_fc_add_ipv6_5tuple,
+
+ (cmdline_parse_inst_t *) &cmd_fc_del_qinq,
+ (cmdline_parse_inst_t *) &cmd_fc_del_ipv4_5tuple,
+ (cmdline_parse_inst_t *) &cmd_fc_del_ipv6_5tuple,
+
+ (cmdline_parse_inst_t *) &cmd_fc_add_default,
+ (cmdline_parse_inst_t *) &cmd_fc_del_default,
+
+ (cmdline_parse_inst_t *) &cmd_fc_add_qinq_all,
+ (cmdline_parse_inst_t *) &cmd_fc_add_ipv4_5tuple_all,
+ (cmdline_parse_inst_t *) &cmd_fc_add_ipv6_5tuple_all,
+
+ (cmdline_parse_inst_t *) &cmd_fc_ls,
+ NULL,
+};
+
+static struct pipeline_fe_ops pipeline_flow_classification_fe_ops = {
+ .f_init = app_pipeline_fc_init,
+ .f_free = app_pipeline_fc_free,
+ .cmds = pipeline_cmds,
+};
+
+struct pipeline_type pipeline_flow_classification = {
+ .name = "FLOW_CLASSIFICATION",
+ .be_ops = &pipeline_flow_classification_be_ops,
+ .fe_ops = &pipeline_flow_classification_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification.h b/examples/ip_pipeline/pipeline/pipeline_flow_classification.h
new file mode 100644
index 00000000..9c775006
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification.h
@@ -0,0 +1,107 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_H__
+#define __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_H__
+
+#include "pipeline.h"
+#include "pipeline_flow_classification_be.h"
+
+enum flow_key_type {
+ FLOW_KEY_QINQ,
+ FLOW_KEY_IPV4_5TUPLE,
+ FLOW_KEY_IPV6_5TUPLE,
+};
+
+struct flow_key_qinq {
+ uint16_t svlan;
+ uint16_t cvlan;
+};
+
+struct flow_key_ipv4_5tuple {
+ uint32_t ip_src;
+ uint32_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+};
+
+struct flow_key_ipv6_5tuple {
+ uint8_t ip_src[16];
+ uint8_t ip_dst[16];
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint32_t proto;
+};
+
+struct pipeline_fc_key {
+ enum flow_key_type type;
+ union {
+ struct flow_key_qinq qinq;
+ struct flow_key_ipv4_5tuple ipv4_5tuple;
+ struct flow_key_ipv6_5tuple ipv6_5tuple;
+ } key;
+};
+
+int
+app_pipeline_fc_add(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key,
+ uint32_t port_id,
+ uint32_t flow_id);
+
+int
+app_pipeline_fc_add_bulk(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key,
+ uint32_t *port_id,
+ uint32_t *flow_id,
+ uint32_t n_keys);
+
+int
+app_pipeline_fc_del(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_fc_key *key);
+
+int
+app_pipeline_fc_add_default(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_pipeline_fc_del_default(struct app_params *app,
+ uint32_t pipeline_id);
+
+extern struct pipeline_type pipeline_flow_classification;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c
new file mode 100644
index 00000000..70d976d5
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.c
@@ -0,0 +1,811 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_table_hash.h>
+#include <rte_byteorder.h>
+#include <pipeline.h>
+
+#include "pipeline_flow_classification_be.h"
+#include "pipeline_actions_common.h"
+#include "parser.h"
+#include "hash_func.h"
+
+struct pipeline_flow_classification {
+ struct pipeline p;
+ pipeline_msg_req_handler custom_handlers[PIPELINE_FC_MSG_REQS];
+
+ uint32_t n_flows;
+ uint32_t key_size;
+ uint32_t flow_id;
+
+ uint32_t key_offset;
+ uint32_t hash_offset;
+ uint8_t key_mask[PIPELINE_FC_FLOW_KEY_MAX_SIZE];
+ uint32_t key_mask_present;
+ uint32_t flow_id_offset;
+
+} __rte_cache_aligned;
+
+static void *
+pipeline_fc_msg_req_custom_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler handlers[] = {
+ [PIPELINE_MSG_REQ_PING] =
+ pipeline_msg_req_ping_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_IN] =
+ pipeline_msg_req_stats_port_in_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_OUT] =
+ pipeline_msg_req_stats_port_out_handler,
+ [PIPELINE_MSG_REQ_STATS_TABLE] =
+ pipeline_msg_req_stats_table_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_ENABLE] =
+ pipeline_msg_req_port_in_enable_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_DISABLE] =
+ pipeline_msg_req_port_in_disable_handler,
+ [PIPELINE_MSG_REQ_CUSTOM] =
+ pipeline_fc_msg_req_custom_handler,
+};
+
+static void *
+pipeline_fc_msg_req_add_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fc_msg_req_add_bulk_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fc_msg_req_del_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fc_msg_req_add_default_handler(struct pipeline *p, void *msg);
+
+static void *
+pipeline_fc_msg_req_del_default_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler custom_handlers[] = {
+ [PIPELINE_FC_MSG_REQ_FLOW_ADD] =
+ pipeline_fc_msg_req_add_handler,
+ [PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK] =
+ pipeline_fc_msg_req_add_bulk_handler,
+ [PIPELINE_FC_MSG_REQ_FLOW_DEL] =
+ pipeline_fc_msg_req_del_handler,
+ [PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT] =
+ pipeline_fc_msg_req_add_default_handler,
+ [PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT] =
+ pipeline_fc_msg_req_del_default_handler,
+};
+
+/*
+ * Flow table
+ */
+struct flow_table_entry {
+ struct rte_pipeline_table_entry head;
+
+ uint32_t flow_id;
+ uint32_t pad;
+};
+
+rte_table_hash_op_hash hash_func[] = {
+ hash_default_key8,
+ hash_default_key16,
+ hash_default_key24,
+ hash_default_key32,
+ hash_default_key40,
+ hash_default_key48,
+ hash_default_key56,
+ hash_default_key64
+};
+
+/*
+ * Flow table AH - Write flow_id to packet meta-data
+ */
+static inline void
+pkt_work_flow_id(
+ struct rte_mbuf *pkt,
+ struct rte_pipeline_table_entry *table_entry,
+ void *arg)
+{
+ struct pipeline_flow_classification *p_fc = arg;
+ uint32_t *flow_id_ptr =
+ RTE_MBUF_METADATA_UINT32_PTR(pkt, p_fc->flow_id_offset);
+ struct flow_table_entry *entry =
+ (struct flow_table_entry *) table_entry;
+
+ /* Read */
+ uint32_t flow_id = entry->flow_id;
+
+ /* Compute */
+
+ /* Write */
+ *flow_id_ptr = flow_id;
+}
+
+static inline void
+pkt4_work_flow_id(
+ struct rte_mbuf **pkts,
+ struct rte_pipeline_table_entry **table_entries,
+ void *arg)
+{
+ struct pipeline_flow_classification *p_fc = arg;
+
+ uint32_t *flow_id_ptr0 =
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[0], p_fc->flow_id_offset);
+ uint32_t *flow_id_ptr1 =
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[1], p_fc->flow_id_offset);
+ uint32_t *flow_id_ptr2 =
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[2], p_fc->flow_id_offset);
+ uint32_t *flow_id_ptr3 =
+ RTE_MBUF_METADATA_UINT32_PTR(pkts[3], p_fc->flow_id_offset);
+
+ struct flow_table_entry *entry0 =
+ (struct flow_table_entry *) table_entries[0];
+ struct flow_table_entry *entry1 =
+ (struct flow_table_entry *) table_entries[1];
+ struct flow_table_entry *entry2 =
+ (struct flow_table_entry *) table_entries[2];
+ struct flow_table_entry *entry3 =
+ (struct flow_table_entry *) table_entries[3];
+
+ /* Read */
+ uint32_t flow_id0 = entry0->flow_id;
+ uint32_t flow_id1 = entry1->flow_id;
+ uint32_t flow_id2 = entry2->flow_id;
+ uint32_t flow_id3 = entry3->flow_id;
+
+ /* Compute */
+
+ /* Write */
+ *flow_id_ptr0 = flow_id0;
+ *flow_id_ptr1 = flow_id1;
+ *flow_id_ptr2 = flow_id2;
+ *flow_id_ptr3 = flow_id3;
+}
+
+PIPELINE_TABLE_AH_HIT(fc_table_ah_hit,
+ pkt_work_flow_id, pkt4_work_flow_id);
+
+static rte_pipeline_table_action_handler_hit
+get_fc_table_ah_hit(struct pipeline_flow_classification *p)
+{
+ if (p->flow_id)
+ return fc_table_ah_hit;
+
+ return NULL;
+}
+
+/*
+ * Argument parsing
+ */
+static int
+pipeline_fc_parse_args(struct pipeline_flow_classification *p,
+ struct pipeline_params *params)
+{
+ uint32_t n_flows_present = 0;
+ uint32_t key_offset_present = 0;
+ uint32_t key_size_present = 0;
+ uint32_t hash_offset_present = 0;
+ uint32_t key_mask_present = 0;
+ uint32_t flow_id_offset_present = 0;
+
+ uint32_t i;
+ char key_mask_str[PIPELINE_FC_FLOW_KEY_MAX_SIZE * 2];
+
+ p->hash_offset = 0;
+
+ /* default values */
+ p->flow_id = 0;
+
+ for (i = 0; i < params->n_args; i++) {
+ char *arg_name = params->args_name[i];
+ char *arg_value = params->args_value[i];
+
+ /* n_flows */
+ if (strcmp(arg_name, "n_flows") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_flows_present == 0, params->name,
+ arg_name);
+ n_flows_present = 1;
+
+ status = parser_read_uint32(&p->n_flows,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->n_flows != 0)), params->name,
+ arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* key_offset */
+ if (strcmp(arg_name, "key_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ key_offset_present == 0, params->name,
+ arg_name);
+ key_offset_present = 1;
+
+ status = parser_read_uint32(&p->key_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* key_size */
+ if (strcmp(arg_name, "key_size") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ key_size_present == 0, params->name,
+ arg_name);
+ key_size_present = 1;
+
+ status = parser_read_uint32(&p->key_size,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->key_size != 0) &&
+ (p->key_size % 8 == 0)),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) &&
+ (p->key_size <=
+ PIPELINE_FC_FLOW_KEY_MAX_SIZE)),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* key_mask */
+ if (strcmp(arg_name, "key_mask") == 0) {
+ int mask_str_len = strlen(arg_value);
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ key_mask_present == 0,
+ params->name, arg_name);
+ key_mask_present = 1;
+
+ PIPELINE_ARG_CHECK((mask_str_len <
+ (PIPELINE_FC_FLOW_KEY_MAX_SIZE * 2)),
+ "Parse error in section \"%s\": entry "
+ "\"%s\" is too long", params->name,
+ arg_name);
+
+ snprintf(key_mask_str, sizeof(key_mask_str), "%s",
+ arg_value);
+
+ continue;
+ }
+
+ /* hash_offset */
+ if (strcmp(arg_name, "hash_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ hash_offset_present == 0, params->name,
+ arg_name);
+ hash_offset_present = 1;
+
+ status = parser_read_uint32(&p->hash_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* flow_id_offset */
+ if (strcmp(arg_name, "flowid_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ flow_id_offset_present == 0, params->name,
+ arg_name);
+ flow_id_offset_present = 1;
+
+ status = parser_read_uint32(&p->flow_id_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ p->flow_id = 1;
+
+ continue;
+ }
+
+ /* Unknown argument */
+ PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name);
+ }
+
+ /* Check that mandatory arguments are present */
+ PIPELINE_PARSE_ERR_MANDATORY((n_flows_present), params->name,
+ "n_flows");
+ PIPELINE_PARSE_ERR_MANDATORY((key_offset_present), params->name,
+ "key_offset");
+ PIPELINE_PARSE_ERR_MANDATORY((key_size_present), params->name,
+ "key_size");
+
+ if (key_mask_present) {
+ uint32_t key_size = p->key_size;
+ int status;
+
+ PIPELINE_ARG_CHECK(((key_size == 8) || (key_size == 16)),
+ "Parse error in section \"%s\": entry key_mask "
+ "only allowed for key_size of 8 or 16 bytes",
+ params->name);
+
+ PIPELINE_ARG_CHECK((strlen(key_mask_str) ==
+ (key_size * 2)), "Parse error in section "
+ "\"%s\": key_mask should have exactly %u hex "
+ "digits", params->name, (key_size * 2));
+
+ PIPELINE_ARG_CHECK((hash_offset_present == 0), "Parse "
+ "error in section \"%s\": entry hash_offset only "
+ "allowed when key_mask is not present",
+ params->name);
+
+ status = parse_hex_string(key_mask_str, p->key_mask,
+ &p->key_size);
+
+ PIPELINE_PARSE_ERR_INV_VAL(((status == 0) &&
+ (key_size == p->key_size)), params->name,
+ "key_mask", key_mask_str);
+ }
+
+ p->key_mask_present = key_mask_present;
+
+ return 0;
+}
+
+static void *pipeline_fc_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline *p;
+ struct pipeline_flow_classification *p_fc;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if (params == NULL)
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(
+ sizeof(struct pipeline_flow_classification));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+ p_fc = (struct pipeline_flow_classification *) p;
+
+ strcpy(p->name, params->name);
+ p->log_level = params->log_level;
+
+ PLOG(p, HIGH, "Flow classification");
+
+ /* Parse arguments */
+ if (pipeline_fc_parse_args(p_fc, params))
+ return NULL;
+
+ /* Pipeline */
+ {
+ struct rte_pipeline_params pipeline_params = {
+ .name = params->name,
+ .socket_id = params->socket_id,
+ .offset_port_id = 0,
+ };
+
+ p->p = rte_pipeline_create(&pipeline_params);
+ if (p->p == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Input ports */
+ p->n_ports_in = params->n_ports_in;
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_port_in_params port_params = {
+ .ops = pipeline_port_in_params_get_ops(
+ &params->port_in[i]),
+ .arg_create = pipeline_port_in_params_convert(
+ &params->port_in[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ .burst_size = params->port_in[i].burst_size,
+ };
+
+ int status = rte_pipeline_port_in_create(p->p,
+ &port_params,
+ &p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Output ports */
+ p->n_ports_out = params->n_ports_out;
+ for (i = 0; i < p->n_ports_out; i++) {
+ struct rte_pipeline_port_out_params port_params = {
+ .ops = pipeline_port_out_params_get_ops(
+ &params->port_out[i]),
+ .arg_create = pipeline_port_out_params_convert(
+ &params->port_out[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ };
+
+ int status = rte_pipeline_port_out_create(p->p,
+ &port_params,
+ &p->port_out_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Tables */
+ p->n_tables = 1;
+ {
+ struct rte_table_hash_key8_ext_params
+ table_hash_key8_params = {
+ .n_entries = p_fc->n_flows,
+ .n_entries_ext = p_fc->n_flows,
+ .signature_offset = p_fc->hash_offset,
+ .key_offset = p_fc->key_offset,
+ .f_hash = hash_func[(p_fc->key_size / 8) - 1],
+ .key_mask = (p_fc->key_mask_present) ?
+ p_fc->key_mask : NULL,
+ .seed = 0,
+ };
+
+ struct rte_table_hash_key16_ext_params
+ table_hash_key16_params = {
+ .n_entries = p_fc->n_flows,
+ .n_entries_ext = p_fc->n_flows,
+ .signature_offset = p_fc->hash_offset,
+ .key_offset = p_fc->key_offset,
+ .f_hash = hash_func[(p_fc->key_size / 8) - 1],
+ .key_mask = (p_fc->key_mask_present) ?
+ p_fc->key_mask : NULL,
+ .seed = 0,
+ };
+
+ struct rte_table_hash_ext_params
+ table_hash_params = {
+ .key_size = p_fc->key_size,
+ .n_keys = p_fc->n_flows,
+ .n_buckets = p_fc->n_flows / 4,
+ .n_buckets_ext = p_fc->n_flows / 4,
+ .f_hash = hash_func[(p_fc->key_size / 8) - 1],
+ .seed = 0,
+ .signature_offset = p_fc->hash_offset,
+ .key_offset = p_fc->key_offset,
+ };
+
+ struct rte_pipeline_table_params table_params = {
+ .ops = NULL, /* set below */
+ .arg_create = NULL, /* set below */
+ .f_action_hit = get_fc_table_ah_hit(p_fc),
+ .f_action_miss = NULL,
+ .arg_ah = p_fc,
+ .action_data_size = sizeof(struct flow_table_entry) -
+ sizeof(struct rte_pipeline_table_entry),
+ };
+
+ int status;
+
+ switch (p_fc->key_size) {
+ case 8:
+ if (p_fc->hash_offset != 0) {
+ table_params.ops =
+ &rte_table_hash_key8_ext_ops;
+ } else {
+ table_params.ops =
+ &rte_table_hash_key8_ext_dosig_ops;
+ }
+ table_params.arg_create = &table_hash_key8_params;
+ break;
+
+ case 16:
+ if (p_fc->hash_offset != 0) {
+ table_params.ops =
+ &rte_table_hash_key16_ext_ops;
+ } else {
+ table_params.ops =
+ &rte_table_hash_key16_ext_dosig_ops;
+ }
+ table_params.arg_create = &table_hash_key16_params;
+ break;
+
+ default:
+ table_params.ops = &rte_table_hash_ext_ops;
+ table_params.arg_create = &table_hash_params;
+ }
+
+ status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Connecting input ports to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_connect_to_table(p->p,
+ p->port_in_id[i],
+ p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Enable input ports */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_enable(p->p,
+ p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Check pipeline consistency */
+ if (rte_pipeline_check(p->p) < 0) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Message queues */
+ p->n_msgq = params->n_msgq;
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_in[i] = params->msgq_in[i];
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_out[i] = params->msgq_out[i];
+
+ /* Message handlers */
+ memcpy(p->handlers, handlers, sizeof(p->handlers));
+ memcpy(p_fc->custom_handlers,
+ custom_handlers,
+ sizeof(p_fc->custom_handlers));
+
+ return p;
+}
+
+static int
+pipeline_fc_free(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return 0;
+}
+
+static int
+pipeline_fc_track(void *pipeline,
+ __rte_unused uint32_t port_in,
+ uint32_t *port_out)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if ((p == NULL) ||
+ (port_in >= p->n_ports_in) ||
+ (port_out == NULL))
+ return -1;
+
+ if (p->n_ports_in == 1) {
+ *port_out = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+pipeline_fc_timer(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ pipeline_msg_req_handle(p);
+ rte_pipeline_flush(p->p);
+
+ return 0;
+}
+
+static void *
+pipeline_fc_msg_req_custom_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_flow_classification *p_fc =
+ (struct pipeline_flow_classification *) p;
+ struct pipeline_custom_msg_req *req = msg;
+ pipeline_msg_req_handler f_handle;
+
+ f_handle = (req->subtype < PIPELINE_FC_MSG_REQS) ?
+ p_fc->custom_handlers[req->subtype] :
+ pipeline_msg_req_invalid_handler;
+
+ if (f_handle == NULL)
+ f_handle = pipeline_msg_req_invalid_handler;
+
+ return f_handle(p, req);
+}
+
+static void *
+pipeline_fc_msg_req_add_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_fc_add_msg_req *req = msg;
+ struct pipeline_fc_add_msg_rsp *rsp = msg;
+
+ struct flow_table_entry entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+ .flow_id = req->flow_id,
+ };
+
+ rsp->status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &req->key,
+ (struct rte_pipeline_table_entry *) &entry,
+ &rsp->key_found,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+static void *
+pipeline_fc_msg_req_add_bulk_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_fc_add_bulk_msg_req *req = msg;
+ struct pipeline_fc_add_bulk_msg_rsp *rsp = msg;
+ uint32_t i;
+
+ for (i = 0; i < req->n_keys; i++) {
+ struct pipeline_fc_add_bulk_flow_req *flow_req = &req->req[i];
+ struct pipeline_fc_add_bulk_flow_rsp *flow_rsp = &req->rsp[i];
+
+ struct flow_table_entry entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[flow_req->port_id]},
+ },
+ .flow_id = flow_req->flow_id,
+ };
+
+ int status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &flow_req->key,
+ (struct rte_pipeline_table_entry *) &entry,
+ &flow_rsp->key_found,
+ (struct rte_pipeline_table_entry **)
+ &flow_rsp->entry_ptr);
+
+ if (status)
+ break;
+ }
+
+ rsp->n_keys = i;
+
+ return rsp;
+}
+
+static void *
+pipeline_fc_msg_req_del_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_fc_del_msg_req *req = msg;
+ struct pipeline_fc_del_msg_rsp *rsp = msg;
+
+ rsp->status = rte_pipeline_table_entry_delete(p->p,
+ p->table_id[0],
+ &req->key,
+ &rsp->key_found,
+ NULL);
+
+ return rsp;
+}
+
+static void *
+pipeline_fc_msg_req_add_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_fc_add_default_msg_req *req = msg;
+ struct pipeline_fc_add_default_msg_rsp *rsp = msg;
+
+ struct flow_table_entry default_entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+
+ .flow_id = 0,
+ };
+
+ rsp->status = rte_pipeline_table_default_entry_add(p->p,
+ p->table_id[0],
+ (struct rte_pipeline_table_entry *) &default_entry,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+static void *
+pipeline_fc_msg_req_del_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_fc_del_default_msg_rsp *rsp = msg;
+
+ rsp->status = rte_pipeline_table_default_entry_delete(p->p,
+ p->table_id[0],
+ NULL);
+
+ return rsp;
+}
+
+struct pipeline_be_ops pipeline_flow_classification_be_ops = {
+ .f_init = pipeline_fc_init,
+ .f_free = pipeline_fc_free,
+ .f_run = NULL,
+ .f_timer = pipeline_fc_timer,
+ .f_track = pipeline_fc_track,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h
new file mode 100644
index 00000000..d8129b21
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_flow_classification_be.h
@@ -0,0 +1,142 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_BE_H__
+#define __INCLUDE_PIPELINE_FLOW_CLASSIFICATION_BE_H__
+
+#include "pipeline_common_be.h"
+
+enum pipeline_fc_msg_req_type {
+ PIPELINE_FC_MSG_REQ_FLOW_ADD = 0,
+ PIPELINE_FC_MSG_REQ_FLOW_ADD_BULK,
+ PIPELINE_FC_MSG_REQ_FLOW_DEL,
+ PIPELINE_FC_MSG_REQ_FLOW_ADD_DEFAULT,
+ PIPELINE_FC_MSG_REQ_FLOW_DEL_DEFAULT,
+ PIPELINE_FC_MSG_REQS,
+};
+
+#ifndef PIPELINE_FC_FLOW_KEY_MAX_SIZE
+#define PIPELINE_FC_FLOW_KEY_MAX_SIZE 64
+#endif
+
+/*
+ * MSG ADD
+ */
+struct pipeline_fc_add_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fc_msg_req_type subtype;
+
+ uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE];
+
+ uint32_t port_id;
+ uint32_t flow_id;
+};
+
+struct pipeline_fc_add_msg_rsp {
+ int status;
+ int key_found;
+ void *entry_ptr;
+};
+
+/*
+ * MSG ADD BULK
+ */
+struct pipeline_fc_add_bulk_flow_req {
+ uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE];
+ uint32_t port_id;
+ uint32_t flow_id;
+};
+
+struct pipeline_fc_add_bulk_flow_rsp {
+ int key_found;
+ void *entry_ptr;
+};
+
+struct pipeline_fc_add_bulk_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fc_msg_req_type subtype;
+
+ struct pipeline_fc_add_bulk_flow_req *req;
+ struct pipeline_fc_add_bulk_flow_rsp *rsp;
+ uint32_t n_keys;
+};
+
+struct pipeline_fc_add_bulk_msg_rsp {
+ uint32_t n_keys;
+};
+
+/*
+ * MSG DEL
+ */
+struct pipeline_fc_del_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fc_msg_req_type subtype;
+
+ uint8_t key[PIPELINE_FC_FLOW_KEY_MAX_SIZE];
+};
+
+struct pipeline_fc_del_msg_rsp {
+ int status;
+ int key_found;
+};
+
+/*
+ * MSG ADD DEFAULT
+ */
+struct pipeline_fc_add_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fc_msg_req_type subtype;
+
+ uint32_t port_id;
+};
+
+struct pipeline_fc_add_default_msg_rsp {
+ int status;
+ void *entry_ptr;
+};
+
+/*
+ * MSG DEL DEFAULT
+ */
+struct pipeline_fc_del_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_fc_msg_req_type subtype;
+};
+
+struct pipeline_fc_del_default_msg_rsp {
+ int status;
+};
+
+extern struct pipeline_be_ops pipeline_flow_classification_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_master.c b/examples/ip_pipeline/pipeline/pipeline_master.c
new file mode 100644
index 00000000..1ccdad14
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_master.c
@@ -0,0 +1,47 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "pipeline_master.h"
+#include "pipeline_master_be.h"
+
+static struct pipeline_fe_ops pipeline_master_fe_ops = {
+ .f_init = NULL,
+ .f_free = NULL,
+ .cmds = NULL,
+};
+
+struct pipeline_type pipeline_master = {
+ .name = "MASTER",
+ .be_ops = &pipeline_master_be_ops,
+ .fe_ops = &pipeline_master_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_master.h b/examples/ip_pipeline/pipeline/pipeline_master.h
new file mode 100644
index 00000000..3fe3030f
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_master.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_MASTER_H__
+#define __INCLUDE_PIPELINE_MASTER_H__
+
+#include "pipeline.h"
+
+extern struct pipeline_type pipeline_master;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_master_be.c b/examples/ip_pipeline/pipeline/pipeline_master_be.c
new file mode 100644
index 00000000..ac0cbbc5
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_master_be.c
@@ -0,0 +1,150 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "app.h"
+#include "pipeline_master_be.h"
+
+struct pipeline_master {
+ struct app_params *app;
+ struct cmdline *cl;
+ int script_file_done;
+} __rte_cache_aligned;
+
+static void*
+pipeline_init(__rte_unused struct pipeline_params *params, void *arg)
+{
+ struct app_params *app = (struct app_params *) arg;
+ struct pipeline_master *p;
+ uint32_t size;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_master));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+
+ /* Initialization */
+ p->app = app;
+
+ p->cl = cmdline_stdin_new(app->cmds, "pipeline> ");
+ if (p->cl == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+
+ p->script_file_done = 0;
+ if (app->script_file == NULL)
+ p->script_file_done = 1;
+
+ return (void *) p;
+}
+
+static int
+pipeline_free(void *pipeline)
+{
+ struct pipeline_master *p = (struct pipeline_master *) pipeline;
+
+ if (p == NULL)
+ return -EINVAL;
+
+ cmdline_stdin_exit(p->cl);
+ rte_free(p);
+
+ return 0;
+}
+
+static int
+pipeline_run(void *pipeline)
+{
+ struct pipeline_master *p = (struct pipeline_master *) pipeline;
+ int status;
+
+ if (p->script_file_done == 0) {
+ struct app_params *app = p->app;
+ int fd = open(app->script_file, O_RDONLY);
+
+ if (fd < 0)
+ printf("Cannot open CLI script file \"%s\"\n",
+ app->script_file);
+ else {
+ struct cmdline *file_cl;
+
+ printf("Running CLI script file \"%s\" ...\n",
+ app->script_file);
+ file_cl = cmdline_new(p->cl->ctx, "", fd, 1);
+ cmdline_interact(file_cl);
+ close(fd);
+ }
+
+ p->script_file_done = 1;
+ }
+
+ status = cmdline_poll(p->cl);
+ if (status < 0)
+ rte_panic("CLI poll error (%" PRId32 ")\n", status);
+ else if (status == RDLINE_EXITED) {
+ cmdline_stdin_exit(p->cl);
+ rte_exit(0, "Bye!\n");
+ }
+
+ return 0;
+}
+
+static int
+pipeline_timer(__rte_unused void *pipeline)
+{
+ return 0;
+}
+
+struct pipeline_be_ops pipeline_master_be_ops = {
+ .f_init = pipeline_init,
+ .f_free = pipeline_free,
+ .f_run = pipeline_run,
+ .f_timer = pipeline_timer,
+ .f_track = NULL,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_master_be.h b/examples/ip_pipeline/pipeline/pipeline_master_be.h
new file mode 100644
index 00000000..00b71fe8
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_master_be.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_MASTER_BE_H__
+#define __INCLUDE_PIPELINE_MASTER_BE_H__
+
+#include "pipeline_common_be.h"
+
+extern struct pipeline_be_ops pipeline_master_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough.c b/examples/ip_pipeline/pipeline/pipeline_passthrough.c
new file mode 100644
index 00000000..fc2cae5e
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_passthrough.c
@@ -0,0 +1,47 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "pipeline_passthrough.h"
+#include "pipeline_passthrough_be.h"
+
+static struct pipeline_fe_ops pipeline_passthrough_fe_ops = {
+ .f_init = NULL,
+ .f_free = NULL,
+ .cmds = NULL,
+};
+
+struct pipeline_type pipeline_passthrough = {
+ .name = "PASS-THROUGH",
+ .be_ops = &pipeline_passthrough_be_ops,
+ .fe_ops = &pipeline_passthrough_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough.h b/examples/ip_pipeline/pipeline/pipeline_passthrough.h
new file mode 100644
index 00000000..420a8768
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_passthrough.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_PASSTHROUGH_H__
+#define __INCLUDE_PIPELINE_PASSTHROUGH_H__
+
+#include "pipeline.h"
+
+extern struct pipeline_type pipeline_passthrough;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c
new file mode 100644
index 00000000..a0d11aea
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.c
@@ -0,0 +1,804 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_byteorder.h>
+#include <rte_table_stub.h>
+#include <rte_table_hash.h>
+#include <rte_pipeline.h>
+
+#include "pipeline_passthrough_be.h"
+#include "pipeline_actions_common.h"
+#include "parser.h"
+#include "hash_func.h"
+
+struct pipeline_passthrough {
+ struct pipeline p;
+ struct pipeline_passthrough_params params;
+ rte_table_hash_op_hash f_hash;
+} __rte_cache_aligned;
+
+static pipeline_msg_req_handler handlers[] = {
+ [PIPELINE_MSG_REQ_PING] =
+ pipeline_msg_req_ping_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_IN] =
+ pipeline_msg_req_stats_port_in_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_OUT] =
+ pipeline_msg_req_stats_port_out_handler,
+ [PIPELINE_MSG_REQ_STATS_TABLE] =
+ pipeline_msg_req_stats_table_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_ENABLE] =
+ pipeline_msg_req_port_in_enable_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_DISABLE] =
+ pipeline_msg_req_port_in_disable_handler,
+ [PIPELINE_MSG_REQ_CUSTOM] =
+ pipeline_msg_req_invalid_handler,
+};
+
+static inline __attribute__((always_inline)) void
+pkt_work(
+ struct rte_mbuf *pkt,
+ void *arg,
+ uint32_t dma_size,
+ uint32_t hash_enabled,
+ uint32_t lb_hash,
+ uint32_t port_out_pow2)
+{
+ struct pipeline_passthrough *p = arg;
+
+ uint64_t *dma_dst = RTE_MBUF_METADATA_UINT64_PTR(pkt,
+ p->params.dma_dst_offset);
+ uint64_t *dma_src = RTE_MBUF_METADATA_UINT64_PTR(pkt,
+ p->params.dma_src_offset);
+ uint64_t *dma_mask = (uint64_t *) p->params.dma_src_mask;
+ uint32_t *dma_hash = RTE_MBUF_METADATA_UINT32_PTR(pkt,
+ p->params.dma_hash_offset);
+ uint32_t i;
+
+ /* Read (dma_src), compute (dma_dst), write (dma_dst) */
+ for (i = 0; i < (dma_size / 8); i++)
+ dma_dst[i] = dma_src[i] & dma_mask[i];
+
+ /* Read (dma_dst), compute (hash), write (hash) */
+ if (hash_enabled) {
+ uint32_t hash = p->f_hash(dma_dst, dma_size, 0);
+ *dma_hash = hash;
+
+ if (lb_hash) {
+ uint32_t port_out;
+
+ if (port_out_pow2)
+ port_out
+ = hash & (p->p.n_ports_out - 1);
+ else
+ port_out
+ = hash % p->p.n_ports_out;
+
+ rte_pipeline_port_out_packet_insert(p->p.p,
+ port_out, pkt);
+ }
+ }
+}
+
+static inline __attribute__((always_inline)) void
+pkt4_work(
+ struct rte_mbuf **pkts,
+ void *arg,
+ uint32_t dma_size,
+ uint32_t hash_enabled,
+ uint32_t lb_hash,
+ uint32_t port_out_pow2)
+{
+ struct pipeline_passthrough *p = arg;
+
+ uint64_t *dma_dst0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ p->params.dma_dst_offset);
+ uint64_t *dma_dst1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ p->params.dma_dst_offset);
+ uint64_t *dma_dst2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ p->params.dma_dst_offset);
+ uint64_t *dma_dst3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ p->params.dma_dst_offset);
+
+ uint64_t *dma_src0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ p->params.dma_src_offset);
+ uint64_t *dma_src1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ p->params.dma_src_offset);
+ uint64_t *dma_src2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ p->params.dma_src_offset);
+ uint64_t *dma_src3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ p->params.dma_src_offset);
+
+ uint64_t *dma_mask = (uint64_t *) p->params.dma_src_mask;
+
+ uint32_t *dma_hash0 = RTE_MBUF_METADATA_UINT32_PTR(pkts[0],
+ p->params.dma_hash_offset);
+ uint32_t *dma_hash1 = RTE_MBUF_METADATA_UINT32_PTR(pkts[1],
+ p->params.dma_hash_offset);
+ uint32_t *dma_hash2 = RTE_MBUF_METADATA_UINT32_PTR(pkts[2],
+ p->params.dma_hash_offset);
+ uint32_t *dma_hash3 = RTE_MBUF_METADATA_UINT32_PTR(pkts[3],
+ p->params.dma_hash_offset);
+
+ uint32_t i;
+
+ /* Read (dma_src), compute (dma_dst), write (dma_dst) */
+ for (i = 0; i < (dma_size / 8); i++) {
+ dma_dst0[i] = dma_src0[i] & dma_mask[i];
+ dma_dst1[i] = dma_src1[i] & dma_mask[i];
+ dma_dst2[i] = dma_src2[i] & dma_mask[i];
+ dma_dst3[i] = dma_src3[i] & dma_mask[i];
+ }
+
+ /* Read (dma_dst), compute (hash), write (hash) */
+ if (hash_enabled) {
+ uint32_t hash0 = p->f_hash(dma_dst0, dma_size, 0);
+ uint32_t hash1 = p->f_hash(dma_dst1, dma_size, 0);
+ uint32_t hash2 = p->f_hash(dma_dst2, dma_size, 0);
+ uint32_t hash3 = p->f_hash(dma_dst3, dma_size, 0);
+
+ *dma_hash0 = hash0;
+ *dma_hash1 = hash1;
+ *dma_hash2 = hash2;
+ *dma_hash3 = hash3;
+
+ if (lb_hash) {
+ uint32_t port_out0, port_out1, port_out2, port_out3;
+
+ if (port_out_pow2) {
+ port_out0
+ = hash0 & (p->p.n_ports_out - 1);
+ port_out1
+ = hash1 & (p->p.n_ports_out - 1);
+ port_out2
+ = hash2 & (p->p.n_ports_out - 1);
+ port_out3
+ = hash3 & (p->p.n_ports_out - 1);
+ } else {
+ port_out0
+ = hash0 % p->p.n_ports_out;
+ port_out1
+ = hash1 % p->p.n_ports_out;
+ port_out2
+ = hash2 % p->p.n_ports_out;
+ port_out3
+ = hash3 % p->p.n_ports_out;
+ }
+ rte_pipeline_port_out_packet_insert(p->p.p,
+ port_out0, pkts[0]);
+ rte_pipeline_port_out_packet_insert(p->p.p,
+ port_out1, pkts[1]);
+ rte_pipeline_port_out_packet_insert(p->p.p,
+ port_out2, pkts[2]);
+ rte_pipeline_port_out_packet_insert(p->p.p,
+ port_out3, pkts[3]);
+ }
+ }
+}
+
+#define PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+static inline void \
+pkt_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2( \
+ struct rte_mbuf *pkt, \
+ void *arg) \
+{ \
+ pkt_work(pkt, arg, dma_size, hash_enabled, lb_hash, port_pow2); \
+}
+
+#define PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+static inline void \
+pkt4_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2( \
+ struct rte_mbuf **pkts, \
+ void *arg) \
+{ \
+ pkt4_work(pkts, arg, dma_size, hash_enabled, lb_hash, port_pow2); \
+}
+
+#define port_in_ah(dma_size, hash_enabled, lb_hash, port_pow2) \
+PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+PIPELINE_PORT_IN_AH(port_in_ah_size##dma_size##_hash \
+ ##hash_enabled##_lb##lb_hash##_pw##port_pow2, \
+ pkt_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2, \
+ pkt4_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2)
+
+
+#define port_in_ah_lb(dma_size, hash_enabled, lb_hash, port_pow2) \
+PKT_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+PKT4_WORK(dma_size, hash_enabled, lb_hash, port_pow2) \
+PIPELINE_PORT_IN_AH_HIJACK_ALL( \
+ port_in_ah_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2, \
+ pkt_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2, \
+ pkt4_work_size##dma_size##_hash##hash_enabled \
+ ##_lb##lb_hash##_pw##port_pow2)
+
+/* Port in AH (dma_size, hash_enabled, lb_hash, port_pow2) */
+
+port_in_ah(8, 0, 0, 0)
+port_in_ah(8, 1, 0, 0)
+port_in_ah_lb(8, 1, 1, 0)
+port_in_ah_lb(8, 1, 1, 1)
+
+port_in_ah(16, 0, 0, 0)
+port_in_ah(16, 1, 0, 0)
+port_in_ah_lb(16, 1, 1, 0)
+port_in_ah_lb(16, 1, 1, 1)
+
+port_in_ah(24, 0, 0, 0)
+port_in_ah(24, 1, 0, 0)
+port_in_ah_lb(24, 1, 1, 0)
+port_in_ah_lb(24, 1, 1, 1)
+
+port_in_ah(32, 0, 0, 0)
+port_in_ah(32, 1, 0, 0)
+port_in_ah_lb(32, 1, 1, 0)
+port_in_ah_lb(32, 1, 1, 1)
+
+port_in_ah(40, 0, 0, 0)
+port_in_ah(40, 1, 0, 0)
+port_in_ah_lb(40, 1, 1, 0)
+port_in_ah_lb(40, 1, 1, 1)
+
+port_in_ah(48, 0, 0, 0)
+port_in_ah(48, 1, 0, 0)
+port_in_ah_lb(48, 1, 1, 0)
+port_in_ah_lb(48, 1, 1, 1)
+
+port_in_ah(56, 0, 0, 0)
+port_in_ah(56, 1, 0, 0)
+port_in_ah_lb(56, 1, 1, 0)
+port_in_ah_lb(56, 1, 1, 1)
+
+port_in_ah(64, 0, 0, 0)
+port_in_ah(64, 1, 0, 0)
+port_in_ah_lb(64, 1, 1, 0)
+port_in_ah_lb(64, 1, 1, 1)
+
+static rte_pipeline_port_in_action_handler
+get_port_in_ah(struct pipeline_passthrough *p)
+{
+ if (p->params.dma_enabled == 0)
+ return NULL;
+
+ if (p->params.dma_hash_enabled) {
+ if (p->params.lb_hash_enabled) {
+ if (rte_is_power_of_2(p->p.n_ports_out))
+ switch (p->params.dma_size) {
+
+ case 8: return port_in_ah_size8_hash1_lb1_pw1;
+ case 16: return port_in_ah_size16_hash1_lb1_pw1;
+ case 24: return port_in_ah_size24_hash1_lb1_pw1;
+ case 32: return port_in_ah_size32_hash1_lb1_pw1;
+ case 40: return port_in_ah_size40_hash1_lb1_pw1;
+ case 48: return port_in_ah_size48_hash1_lb1_pw1;
+ case 56: return port_in_ah_size56_hash1_lb1_pw1;
+ case 64: return port_in_ah_size64_hash1_lb1_pw1;
+ default: return NULL;
+ }
+ else
+ switch (p->params.dma_size) {
+
+ case 8: return port_in_ah_size8_hash1_lb1_pw0;
+ case 16: return port_in_ah_size16_hash1_lb1_pw0;
+ case 24: return port_in_ah_size24_hash1_lb1_pw0;
+ case 32: return port_in_ah_size32_hash1_lb1_pw0;
+ case 40: return port_in_ah_size40_hash1_lb1_pw0;
+ case 48: return port_in_ah_size48_hash1_lb1_pw0;
+ case 56: return port_in_ah_size56_hash1_lb1_pw0;
+ case 64: return port_in_ah_size64_hash1_lb1_pw0;
+ default: return NULL;
+ }
+ } else
+ switch (p->params.dma_size) {
+
+ case 8: return port_in_ah_size8_hash1_lb0_pw0;
+ case 16: return port_in_ah_size16_hash1_lb0_pw0;
+ case 24: return port_in_ah_size24_hash1_lb0_pw0;
+ case 32: return port_in_ah_size32_hash1_lb0_pw0;
+ case 40: return port_in_ah_size40_hash1_lb0_pw0;
+ case 48: return port_in_ah_size48_hash1_lb0_pw0;
+ case 56: return port_in_ah_size56_hash1_lb0_pw0;
+ case 64: return port_in_ah_size64_hash1_lb0_pw0;
+ default: return NULL;
+ }
+ } else
+ switch (p->params.dma_size) {
+
+ case 8: return port_in_ah_size8_hash0_lb0_pw0;
+ case 16: return port_in_ah_size16_hash0_lb0_pw0;
+ case 24: return port_in_ah_size24_hash0_lb0_pw0;
+ case 32: return port_in_ah_size32_hash0_lb0_pw0;
+ case 40: return port_in_ah_size40_hash0_lb0_pw0;
+ case 48: return port_in_ah_size48_hash0_lb0_pw0;
+ case 56: return port_in_ah_size56_hash0_lb0_pw0;
+ case 64: return port_in_ah_size64_hash0_lb0_pw0;
+ default: return NULL;
+ }
+}
+
+int
+pipeline_passthrough_parse_args(struct pipeline_passthrough_params *p,
+ struct pipeline_params *params)
+{
+ uint32_t dma_dst_offset_present = 0;
+ uint32_t dma_src_offset_present = 0;
+ uint32_t dma_src_mask_present = 0;
+ uint32_t dma_size_present = 0;
+ uint32_t dma_hash_offset_present = 0;
+ uint32_t lb_present = 0;
+ uint32_t i;
+ char dma_mask_str[PIPELINE_PASSTHROUGH_DMA_SIZE_MAX * 2];
+
+ /* default values */
+ p->dma_enabled = 0;
+ p->dma_hash_enabled = 0;
+ p->lb_hash_enabled = 0;
+ memset(p->dma_src_mask, 0xFF, sizeof(p->dma_src_mask));
+
+ for (i = 0; i < params->n_args; i++) {
+ char *arg_name = params->args_name[i];
+ char *arg_value = params->args_value[i];
+
+ /* dma_dst_offset */
+ if (strcmp(arg_name, "dma_dst_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dma_dst_offset_present == 0, params->name,
+ arg_name);
+ dma_dst_offset_present = 1;
+
+ status = parser_read_uint32(&p->dma_dst_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ p->dma_enabled = 1;
+
+ continue;
+ }
+
+ /* dma_src_offset */
+ if (strcmp(arg_name, "dma_src_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dma_src_offset_present == 0, params->name,
+ arg_name);
+ dma_src_offset_present = 1;
+
+ status = parser_read_uint32(&p->dma_src_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ p->dma_enabled = 1;
+
+ continue;
+ }
+
+ /* dma_size */
+ if (strcmp(arg_name, "dma_size") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dma_size_present == 0, params->name,
+ arg_name);
+ dma_size_present = 1;
+
+ status = parser_read_uint32(&p->dma_size,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->dma_size != 0) &&
+ ((p->dma_size % 8) == 0)),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG(((status != -ERANGE) &&
+ (p->dma_size <=
+ PIPELINE_PASSTHROUGH_DMA_SIZE_MAX)),
+ params->name, arg_name, arg_value);
+
+ p->dma_enabled = 1;
+
+ continue;
+ }
+
+ /* dma_src_mask */
+ if (strcmp(arg_name, "dma_src_mask") == 0) {
+ int mask_str_len = strlen(arg_value);
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dma_src_mask_present == 0,
+ params->name, arg_name);
+ dma_src_mask_present = 1;
+
+ PIPELINE_ARG_CHECK((mask_str_len <
+ (PIPELINE_PASSTHROUGH_DMA_SIZE_MAX * 2)),
+ "Parse error in section \"%s\": entry "
+ "\"%s\" too long", params->name,
+ arg_name);
+
+ snprintf(dma_mask_str, mask_str_len + 1,
+ "%s", arg_value);
+
+ p->dma_enabled = 1;
+
+ continue;
+ }
+
+ /* dma_hash_offset */
+ if (strcmp(arg_name, "dma_hash_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dma_hash_offset_present == 0,
+ params->name, arg_name);
+ dma_hash_offset_present = 1;
+
+ status = parser_read_uint32(&p->dma_hash_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ p->dma_hash_enabled = 1;
+ p->dma_enabled = 1;
+
+ continue;
+ }
+
+ /* load_balance mode */
+ if (strcmp(arg_name, "lb") == 0) {
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ lb_present == 0,
+ params->name, arg_name);
+ lb_present = 1;
+
+ if ((strcmp(arg_value, "hash") == 0) ||
+ (strcmp(arg_value, "HASH") == 0))
+ p->lb_hash_enabled = 1;
+ else
+ PIPELINE_PARSE_ERR_INV_VAL(0,
+ params->name,
+ arg_name,
+ arg_value);
+
+ continue;
+ }
+
+ /* any other */
+ PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name);
+ }
+
+ /* Check correlations between arguments */
+ PIPELINE_ARG_CHECK((dma_dst_offset_present == p->dma_enabled),
+ "Parse error in section \"%s\": missing entry "
+ "\"dma_dst_offset\"", params->name);
+ PIPELINE_ARG_CHECK((dma_src_offset_present == p->dma_enabled),
+ "Parse error in section \"%s\": missing entry "
+ "\"dma_src_offset\"", params->name);
+ PIPELINE_ARG_CHECK((dma_size_present == p->dma_enabled),
+ "Parse error in section \"%s\": missing entry "
+ "\"dma_size\"", params->name);
+ PIPELINE_ARG_CHECK((dma_hash_offset_present == p->dma_enabled),
+ "Parse error in section \"%s\": missing entry "
+ "\"dma_hash_offset\"", params->name);
+ PIPELINE_ARG_CHECK((p->lb_hash_enabled <= p->dma_hash_enabled),
+ "Parse error in section \"%s\": missing entry "
+ "\"dma_hash_offset\"", params->name);
+
+ if (dma_src_mask_present) {
+ uint32_t dma_size = p->dma_size;
+ int status;
+
+ PIPELINE_ARG_CHECK((strlen(dma_mask_str) ==
+ (dma_size * 2)), "Parse error in section "
+ "\"%s\": dma_src_mask should have exactly %u hex "
+ "digits", params->name, (dma_size * 2));
+
+ status = parse_hex_string(dma_mask_str, p->dma_src_mask,
+ &p->dma_size);
+
+ PIPELINE_PARSE_ERR_INV_VAL(((status == 0) &&
+ (dma_size == p->dma_size)), params->name,
+ "dma_src_mask", dma_mask_str);
+ }
+
+ return 0;
+}
+
+
+static rte_table_hash_op_hash
+get_hash_function(struct pipeline_passthrough *p)
+{
+ switch (p->params.dma_size) {
+
+ case 8: return hash_default_key8;
+ case 16: return hash_default_key16;
+ case 24: return hash_default_key24;
+ case 32: return hash_default_key32;
+ case 40: return hash_default_key40;
+ case 48: return hash_default_key48;
+ case 56: return hash_default_key56;
+ case 64: return hash_default_key64;
+ default: return NULL;
+ }
+}
+
+static void*
+pipeline_passthrough_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline *p;
+ struct pipeline_passthrough *p_pt;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0) ||
+ (params->n_ports_in < params->n_ports_out) ||
+ (params->n_ports_in % params->n_ports_out))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_passthrough));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ p_pt = (struct pipeline_passthrough *) p;
+ if (p == NULL)
+ return NULL;
+
+ strcpy(p->name, params->name);
+ p->log_level = params->log_level;
+
+ PLOG(p, HIGH, "Pass-through");
+
+ /* Parse arguments */
+ if (pipeline_passthrough_parse_args(&p_pt->params, params))
+ return NULL;
+ p_pt->f_hash = get_hash_function(p_pt);
+
+ /* Pipeline */
+ {
+ struct rte_pipeline_params pipeline_params = {
+ .name = "PASS-THROUGH",
+ .socket_id = params->socket_id,
+ .offset_port_id = 0,
+ };
+
+ p->p = rte_pipeline_create(&pipeline_params);
+ if (p->p == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ p->n_ports_in = params->n_ports_in;
+ p->n_ports_out = params->n_ports_out;
+ p->n_tables = p->n_ports_in;
+
+ /*Input ports*/
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_port_in_params port_params = {
+ .ops = pipeline_port_in_params_get_ops(
+ &params->port_in[i]),
+ .arg_create = pipeline_port_in_params_convert(
+ &params->port_in[i]),
+ .f_action = get_port_in_ah(p_pt),
+ .arg_ah = p_pt,
+ .burst_size = params->port_in[i].burst_size,
+ };
+
+ int status = rte_pipeline_port_in_create(p->p,
+ &port_params,
+ &p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Output ports */
+ for (i = 0; i < p->n_ports_out; i++) {
+ struct rte_pipeline_port_out_params port_params = {
+ .ops = pipeline_port_out_params_get_ops(
+ &params->port_out[i]),
+ .arg_create = pipeline_port_out_params_convert(
+ &params->port_out[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ };
+
+ int status = rte_pipeline_port_out_create(p->p,
+ &port_params,
+ &p->port_out_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_table_params table_params = {
+ .ops = &rte_table_stub_ops,
+ .arg_create = NULL,
+ .f_action_hit = NULL,
+ .f_action_miss = NULL,
+ .arg_ah = NULL,
+ .action_data_size = 0,
+ };
+
+ int status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Connecting input ports to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_connect_to_table(p->p,
+ p->port_in_id[i],
+ p->table_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Add entries to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_table_entry default_entry = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[
+ i / (p->n_ports_in / p->n_ports_out)]},
+ };
+
+ struct rte_pipeline_table_entry *default_entry_ptr;
+
+ int status = rte_pipeline_table_default_entry_add(p->p,
+ p->table_id[i],
+ &default_entry,
+ &default_entry_ptr);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Enable input ports */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_enable(p->p,
+ p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Check pipeline consistency */
+ if (rte_pipeline_check(p->p) < 0) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Message queues */
+ p->n_msgq = params->n_msgq;
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_in[i] = params->msgq_in[i];
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_out[i] = params->msgq_out[i];
+
+ /* Message handlers */
+ memcpy(p->handlers, handlers, sizeof(p->handlers));
+
+ return p;
+}
+
+static int
+pipeline_passthrough_free(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return 0;
+}
+
+static int
+pipeline_passthrough_timer(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ pipeline_msg_req_handle(p);
+ rte_pipeline_flush(p->p);
+
+ return 0;
+}
+
+static int
+pipeline_passthrough_track(void *pipeline, uint32_t port_in, uint32_t *port_out)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if ((p == NULL) ||
+ (port_in >= p->n_ports_in) ||
+ (port_out == NULL))
+ return -1;
+
+ *port_out = port_in / p->n_ports_in;
+ return 0;
+}
+
+struct pipeline_be_ops pipeline_passthrough_be_ops = {
+ .f_init = pipeline_passthrough_init,
+ .f_free = pipeline_passthrough_free,
+ .f_run = NULL,
+ .f_timer = pipeline_passthrough_timer,
+ .f_track = pipeline_passthrough_track,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h
new file mode 100644
index 00000000..9368cec7
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_passthrough_be.h
@@ -0,0 +1,59 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_PASSTHROUGH_BE_H__
+#define __INCLUDE_PIPELINE_PASSTHROUGH_BE_H__
+
+#include "pipeline_common_be.h"
+
+#define PIPELINE_PASSTHROUGH_DMA_SIZE_MAX 64
+
+struct pipeline_passthrough_params {
+ uint32_t dma_enabled;
+ uint32_t dma_dst_offset;
+ uint32_t dma_src_offset;
+ uint8_t dma_src_mask[PIPELINE_PASSTHROUGH_DMA_SIZE_MAX];
+ uint32_t dma_size;
+
+ uint32_t dma_hash_enabled;
+ uint32_t dma_hash_offset;
+ uint32_t lb_hash_enabled;
+};
+
+int
+pipeline_passthrough_parse_args(struct pipeline_passthrough_params *p,
+ struct pipeline_params *params);
+
+extern struct pipeline_be_ops pipeline_passthrough_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.c b/examples/ip_pipeline/pipeline/pipeline_routing.c
new file mode 100644
index 00000000..eab89f2e
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_routing.c
@@ -0,0 +1,2239 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "app.h"
+#include "pipeline_common_fe.h"
+#include "pipeline_routing.h"
+
+struct app_pipeline_routing_route {
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data data;
+ void *entry_ptr;
+
+ TAILQ_ENTRY(app_pipeline_routing_route) node;
+};
+
+struct app_pipeline_routing_arp_entry {
+ struct pipeline_routing_arp_key key;
+ struct ether_addr macaddr;
+ void *entry_ptr;
+
+ TAILQ_ENTRY(app_pipeline_routing_arp_entry) node;
+};
+
+struct pipeline_routing {
+ /* Parameters */
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+
+ /* Routes */
+ TAILQ_HEAD(, app_pipeline_routing_route) routes;
+ uint32_t n_routes;
+
+ uint32_t default_route_present;
+ uint32_t default_route_port_id;
+ void *default_route_entry_ptr;
+
+ /* ARP entries */
+ TAILQ_HEAD(, app_pipeline_routing_arp_entry) arp_entries;
+ uint32_t n_arp_entries;
+
+ uint32_t default_arp_entry_present;
+ uint32_t default_arp_entry_port_id;
+ void *default_arp_entry_ptr;
+};
+
+static void *
+pipeline_routing_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline_routing *p;
+ uint32_t size;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_routing));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ if (p == NULL)
+ return NULL;
+
+ /* Initialization */
+ p->n_ports_in = params->n_ports_in;
+ p->n_ports_out = params->n_ports_out;
+
+ TAILQ_INIT(&p->routes);
+ p->n_routes = 0;
+
+ TAILQ_INIT(&p->arp_entries);
+ p->n_arp_entries = 0;
+
+ return p;
+}
+
+static int
+app_pipeline_routing_free(void *pipeline)
+{
+ struct pipeline_routing *p = pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ while (!TAILQ_EMPTY(&p->routes)) {
+ struct app_pipeline_routing_route *route;
+
+ route = TAILQ_FIRST(&p->routes);
+ TAILQ_REMOVE(&p->routes, route, node);
+ rte_free(route);
+ }
+
+ while (!TAILQ_EMPTY(&p->arp_entries)) {
+ struct app_pipeline_routing_arp_entry *arp_entry;
+
+ arp_entry = TAILQ_FIRST(&p->arp_entries);
+ TAILQ_REMOVE(&p->arp_entries, arp_entry, node);
+ rte_free(arp_entry);
+ }
+
+ rte_free(p);
+ return 0;
+}
+
+static struct app_pipeline_routing_route *
+app_pipeline_routing_find_route(struct pipeline_routing *p,
+ const struct pipeline_routing_route_key *key)
+{
+ struct app_pipeline_routing_route *it, *found;
+
+ found = NULL;
+ TAILQ_FOREACH(it, &p->routes, node) {
+ if ((key->type == it->key.type) &&
+ (key->key.ipv4.ip == it->key.key.ipv4.ip) &&
+ (key->key.ipv4.depth == it->key.key.ipv4.depth)) {
+ found = it;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static struct app_pipeline_routing_arp_entry *
+app_pipeline_routing_find_arp_entry(struct pipeline_routing *p,
+ const struct pipeline_routing_arp_key *key)
+{
+ struct app_pipeline_routing_arp_entry *it, *found;
+
+ found = NULL;
+ TAILQ_FOREACH(it, &p->arp_entries, node) {
+ if ((key->type == it->key.type) &&
+ (key->key.ipv4.port_id == it->key.key.ipv4.port_id) &&
+ (key->key.ipv4.ip == it->key.key.ipv4.ip)) {
+ found = it;
+ break;
+ }
+ }
+
+ return found;
+}
+
+static void
+print_route(const struct app_pipeline_routing_route *route)
+{
+ if (route->key.type == PIPELINE_ROUTING_ROUTE_IPV4) {
+ const struct pipeline_routing_route_key_ipv4 *key =
+ &route->key.key.ipv4;
+
+ printf("IP Prefix = %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32 "/%" PRIu32
+ " => (Port = %" PRIu32,
+
+ (key->ip >> 24) & 0xFF,
+ (key->ip >> 16) & 0xFF,
+ (key->ip >> 8) & 0xFF,
+ key->ip & 0xFF,
+
+ key->depth,
+ route->data.port_id);
+
+ if (route->data.flags & PIPELINE_ROUTING_ROUTE_ARP)
+ printf(
+ ", Next Hop IP = %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32,
+
+ (route->data.ethernet.ip >> 24) & 0xFF,
+ (route->data.ethernet.ip >> 16) & 0xFF,
+ (route->data.ethernet.ip >> 8) & 0xFF,
+ route->data.ethernet.ip & 0xFF);
+ else
+ printf(
+ ", Next Hop HWaddress = %02" PRIx32
+ ":%02" PRIx32 ":%02" PRIx32
+ ":%02" PRIx32 ":%02" PRIx32
+ ":%02" PRIx32,
+
+ route->data.ethernet.macaddr.addr_bytes[0],
+ route->data.ethernet.macaddr.addr_bytes[1],
+ route->data.ethernet.macaddr.addr_bytes[2],
+ route->data.ethernet.macaddr.addr_bytes[3],
+ route->data.ethernet.macaddr.addr_bytes[4],
+ route->data.ethernet.macaddr.addr_bytes[5]);
+
+ if (route->data.flags & PIPELINE_ROUTING_ROUTE_QINQ)
+ printf(", QinQ SVLAN = %" PRIu32 " CVLAN = %" PRIu32,
+ route->data.l2.qinq.svlan,
+ route->data.l2.qinq.cvlan);
+
+ if (route->data.flags & PIPELINE_ROUTING_ROUTE_MPLS) {
+ uint32_t i;
+
+ printf(", MPLS labels");
+ for (i = 0; i < route->data.l2.mpls.n_labels; i++)
+ printf(" %" PRIu32,
+ route->data.l2.mpls.labels[i]);
+ }
+
+ printf(")\n");
+ }
+}
+
+static void
+print_arp_entry(const struct app_pipeline_routing_arp_entry *entry)
+{
+ printf("(Port = %" PRIu32 ", IP = %" PRIu32 ".%" PRIu32
+ ".%" PRIu32 ".%" PRIu32
+ ") => HWaddress = %02" PRIx32 ":%02" PRIx32 ":%02" PRIx32
+ ":%02" PRIx32 ":%02" PRIx32 ":%02" PRIx32 "\n",
+
+ entry->key.key.ipv4.port_id,
+ (entry->key.key.ipv4.ip >> 24) & 0xFF,
+ (entry->key.key.ipv4.ip >> 16) & 0xFF,
+ (entry->key.key.ipv4.ip >> 8) & 0xFF,
+ entry->key.key.ipv4.ip & 0xFF,
+
+ entry->macaddr.addr_bytes[0],
+ entry->macaddr.addr_bytes[1],
+ entry->macaddr.addr_bytes[2],
+ entry->macaddr.addr_bytes[3],
+ entry->macaddr.addr_bytes[4],
+ entry->macaddr.addr_bytes[5]);
+}
+
+static int
+app_pipeline_routing_route_ls(struct app_params *app, uint32_t pipeline_id)
+{
+ struct pipeline_routing *p;
+ struct app_pipeline_routing_route *it;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(it, &p->routes, node)
+ print_route(it);
+
+ if (p->default_route_present)
+ printf("Default route: port %" PRIu32 " (entry ptr = %p)\n",
+ p->default_route_port_id,
+ p->default_route_entry_ptr);
+ else
+ printf("Default: DROP\n");
+
+ return 0;
+}
+
+int
+app_pipeline_routing_add_route(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_route_key *key,
+ struct pipeline_routing_route_data *data)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_route_add_msg_req *req;
+ struct pipeline_routing_route_add_msg_rsp *rsp;
+
+ struct app_pipeline_routing_route *entry;
+
+ int new_entry;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL) ||
+ (data == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ switch (key->type) {
+ case PIPELINE_ROUTING_ROUTE_IPV4:
+ {
+ uint32_t depth = key->key.ipv4.depth;
+ uint32_t netmask;
+
+ /* key */
+ if ((depth == 0) || (depth > 32))
+ return -1;
+
+ netmask = (~0U) << (32 - depth);
+ key->key.ipv4.ip &= netmask;
+
+ /* data */
+ if (data->port_id >= p->n_ports_out)
+ return -1;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /* Find existing rule or allocate new rule */
+ entry = app_pipeline_routing_find_route(p, key);
+ new_entry = (entry == NULL);
+ if (entry == NULL) {
+ entry = rte_malloc(NULL, sizeof(*entry), RTE_CACHE_LINE_SIZE);
+
+ if (entry == NULL)
+ return -1;
+ }
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD;
+ memcpy(&req->key, key, sizeof(*key));
+ memcpy(&req->data, data, sizeof(*data));
+
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ /* Read response and write entry */
+ if (rsp->status ||
+ (rsp->entry_ptr == NULL) ||
+ ((new_entry == 0) && (rsp->key_found == 0)) ||
+ ((new_entry == 1) && (rsp->key_found == 1))) {
+ app_msg_free(app, rsp);
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ memcpy(&entry->key, key, sizeof(*key));
+ memcpy(&entry->data, data, sizeof(*data));
+ entry->entry_ptr = rsp->entry_ptr;
+
+ /* Commit entry */
+ if (new_entry) {
+ TAILQ_INSERT_TAIL(&p->routes, entry, node);
+ p->n_routes++;
+ }
+
+ print_route(entry);
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+ return 0;
+}
+
+int
+app_pipeline_routing_delete_route(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_route_key *key)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_route_delete_msg_req *req;
+ struct pipeline_routing_route_delete_msg_rsp *rsp;
+
+ struct app_pipeline_routing_route *entry;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ switch (key->type) {
+ case PIPELINE_ROUTING_ROUTE_IPV4:
+ {
+ uint32_t depth = key->key.ipv4.depth;
+ uint32_t netmask;
+
+ /* key */
+ if ((depth == 0) || (depth > 32))
+ return -1;
+
+ netmask = (~0U) << (32 - depth);
+ key->key.ipv4.ip &= netmask;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /* Find rule */
+ entry = app_pipeline_routing_find_route(p, key);
+ if (entry == NULL)
+ return 0;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL;
+ memcpy(&req->key, key, sizeof(*key));
+
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status || !rsp->key_found) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Remove route */
+ TAILQ_REMOVE(&p->routes, entry, node);
+ p->n_routes--;
+ rte_free(entry);
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_routing_add_default_route(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_route_add_default_msg_req *req;
+ struct pipeline_routing_route_add_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write route */
+ if (rsp->status || (rsp->entry_ptr == NULL)) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ p->default_route_port_id = port_id;
+ p->default_route_entry_ptr = rsp->entry_ptr;
+
+ /* Commit route */
+ p->default_route_present = 1;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_routing_delete_default_route(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_arp_delete_default_msg_req *req;
+ struct pipeline_routing_arp_delete_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write route */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Commit route */
+ p->default_route_present = 0;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+static int
+app_pipeline_routing_arp_ls(struct app_params *app, uint32_t pipeline_id)
+{
+ struct pipeline_routing *p;
+ struct app_pipeline_routing_arp_entry *it;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -EINVAL;
+
+ TAILQ_FOREACH(it, &p->arp_entries, node)
+ print_arp_entry(it);
+
+ if (p->default_arp_entry_present)
+ printf("Default entry: port %" PRIu32 " (entry ptr = %p)\n",
+ p->default_arp_entry_port_id,
+ p->default_arp_entry_ptr);
+ else
+ printf("Default: DROP\n");
+
+ return 0;
+}
+
+int
+app_pipeline_routing_add_arp_entry(struct app_params *app, uint32_t pipeline_id,
+ struct pipeline_routing_arp_key *key,
+ struct ether_addr *macaddr)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_arp_add_msg_req *req;
+ struct pipeline_routing_arp_add_msg_rsp *rsp;
+
+ struct app_pipeline_routing_arp_entry *entry;
+
+ int new_entry;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL) ||
+ (macaddr == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ switch (key->type) {
+ case PIPELINE_ROUTING_ARP_IPV4:
+ {
+ uint32_t port_id = key->key.ipv4.port_id;
+
+ /* key */
+ if (port_id >= p->n_ports_out)
+ return -1;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /* Find existing entry or allocate new */
+ entry = app_pipeline_routing_find_arp_entry(p, key);
+ new_entry = (entry == NULL);
+ if (entry == NULL) {
+ entry = rte_malloc(NULL, sizeof(*entry), RTE_CACHE_LINE_SIZE);
+
+ if (entry == NULL)
+ return -1;
+ }
+
+ /* Message buffer allocation */
+ req = app_msg_alloc(app);
+ if (req == NULL) {
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_ADD;
+ memcpy(&req->key, key, sizeof(*key));
+ ether_addr_copy(macaddr, &req->macaddr);
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL) {
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ /* Read response and write entry */
+ if (rsp->status ||
+ (rsp->entry_ptr == NULL) ||
+ ((new_entry == 0) && (rsp->key_found == 0)) ||
+ ((new_entry == 1) && (rsp->key_found == 1))) {
+ app_msg_free(app, rsp);
+ if (new_entry)
+ rte_free(entry);
+ return -1;
+ }
+
+ memcpy(&entry->key, key, sizeof(*key));
+ ether_addr_copy(macaddr, &entry->macaddr);
+ entry->entry_ptr = rsp->entry_ptr;
+
+ /* Commit entry */
+ if (new_entry) {
+ TAILQ_INSERT_TAIL(&p->arp_entries, entry, node);
+ p->n_arp_entries++;
+ }
+
+ print_arp_entry(entry);
+
+ /* Message buffer free */
+ app_msg_free(app, rsp);
+ return 0;
+}
+
+int
+app_pipeline_routing_delete_arp_entry(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_arp_key *key)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_arp_delete_msg_req *req;
+ struct pipeline_routing_arp_delete_msg_rsp *rsp;
+
+ struct app_pipeline_routing_arp_entry *entry;
+
+ /* Check input arguments */
+ if ((app == NULL) ||
+ (key == NULL))
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -EINVAL;
+
+ switch (key->type) {
+ case PIPELINE_ROUTING_ARP_IPV4:
+ {
+ uint32_t port_id = key->key.ipv4.port_id;
+
+ /* key */
+ if (port_id >= p->n_ports_out)
+ return -1;
+ }
+ break;
+
+ default:
+ return -1;
+ }
+
+ /* Find rule */
+ entry = app_pipeline_routing_find_arp_entry(p, key);
+ if (entry == NULL)
+ return 0;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_DEL;
+ memcpy(&req->key, key, sizeof(*key));
+
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response */
+ if (rsp->status || !rsp->key_found) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ /* Remove entry */
+ TAILQ_REMOVE(&p->arp_entries, entry, node);
+ p->n_arp_entries--;
+ rte_free(entry);
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_routing_add_default_arp_entry(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_arp_add_default_msg_req *req;
+ struct pipeline_routing_arp_add_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -1;
+
+ if (port_id >= p->n_ports_out)
+ return -1;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT;
+ req->port_id = port_id;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ /* Read response and write entry */
+ if (rsp->status || rsp->entry_ptr == NULL) {
+ app_msg_free(app, rsp);
+ return -1;
+ }
+
+ p->default_arp_entry_port_id = port_id;
+ p->default_arp_entry_ptr = rsp->entry_ptr;
+
+ /* Commit entry */
+ p->default_arp_entry_present = 1;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+int
+app_pipeline_routing_delete_default_arp_entry(struct app_params *app,
+ uint32_t pipeline_id)
+{
+ struct pipeline_routing *p;
+
+ struct pipeline_routing_arp_delete_default_msg_req *req;
+ struct pipeline_routing_arp_delete_default_msg_rsp *rsp;
+
+ /* Check input arguments */
+ if (app == NULL)
+ return -1;
+
+ p = app_pipeline_data_fe(app, pipeline_id, &pipeline_routing);
+ if (p == NULL)
+ return -EINVAL;
+
+ /* Allocate and write request */
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -ENOMEM;
+
+ req->type = PIPELINE_MSG_REQ_CUSTOM;
+ req->subtype = PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT;
+
+ /* Send request and wait for response */
+ rsp = app_msg_send_recv(app, pipeline_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -ETIMEDOUT;
+
+ /* Read response and write entry */
+ if (rsp->status) {
+ app_msg_free(app, rsp);
+ return rsp->status;
+ }
+
+ /* Commit entry */
+ p->default_arp_entry_present = 0;
+
+ /* Free response */
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+static int
+parse_labels(char *string, uint32_t *labels, uint32_t *n_labels)
+{
+ uint32_t n_max_labels = *n_labels, count = 0;
+
+ /* Check for void list of labels */
+ if (strcmp(string, "<void>") == 0) {
+ *n_labels = 0;
+ return 0;
+ }
+
+ /* At least one label should be present */
+ for ( ; (*string != '\0'); ) {
+ char *next;
+ int value;
+
+ if (count >= n_max_labels)
+ return -1;
+
+ if (count > 0) {
+ if (string[0] != ':')
+ return -1;
+
+ string++;
+ }
+
+ value = strtol(string, &next, 10);
+ if (next == string)
+ return -1;
+ string = next;
+
+ labels[count++] = (uint32_t) value;
+ }
+
+ *n_labels = count;
+ return 0;
+}
+
+/*
+ * route add (mpls = no, qinq = no, arp = no)
+ */
+
+struct cmd_route_add1_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ struct ether_addr macaddr;
+};
+
+static void
+cmd_route_add1_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add1_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ int status;
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = 0;
+ route_data.port_id = params->port;
+ route_data.ethernet.macaddr = params->macaddr;
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add1_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add1_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add1_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add1_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add1_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add1_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add1_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add1_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add1_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add1_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add1_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add1_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_etheraddr_t cmd_route_add1_macaddr =
+ TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add1_result, macaddr);
+
+static cmdline_parse_inst_t cmd_route_add1 = {
+ .f = cmd_route_add1_parsed,
+ .data = NULL,
+ .help_str = "Route add (mpls = no, qinq = no, arp = no)",
+ .tokens = {
+ (void *)&cmd_route_add1_p_string,
+ (void *)&cmd_route_add1_p,
+ (void *)&cmd_route_add1_route_string,
+ (void *)&cmd_route_add1_add_string,
+ (void *)&cmd_route_add1_ip,
+ (void *)&cmd_route_add1_depth,
+ (void *)&cmd_route_add1_port_string,
+ (void *)&cmd_route_add1_port,
+ (void *)&cmd_route_add1_ether_string,
+ (void *)&cmd_route_add1_macaddr,
+ NULL,
+ },
+};
+
+/*
+ * route add (mpls = no, qinq = no, arp = yes)
+ */
+
+struct cmd_route_add2_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ cmdline_ipaddr_t nh_ip;
+};
+
+static void
+cmd_route_add2_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add2_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ int status;
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = PIPELINE_ROUTING_ROUTE_ARP;
+ route_data.port_id = params->port;
+ route_data.ethernet.ip =
+ rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr);
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add2_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add2_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add2_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add2_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add2_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add2_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add2_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add2_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add2_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add2_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add2_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add2_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add2_nh_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add2_result, nh_ip);
+
+static cmdline_parse_inst_t cmd_route_add2 = {
+ .f = cmd_route_add2_parsed,
+ .data = NULL,
+ .help_str = "Route add (mpls = no, qinq = no, arp = yes)",
+ .tokens = {
+ (void *)&cmd_route_add2_p_string,
+ (void *)&cmd_route_add2_p,
+ (void *)&cmd_route_add2_route_string,
+ (void *)&cmd_route_add2_add_string,
+ (void *)&cmd_route_add2_ip,
+ (void *)&cmd_route_add2_depth,
+ (void *)&cmd_route_add2_port_string,
+ (void *)&cmd_route_add2_port,
+ (void *)&cmd_route_add2_ether_string,
+ (void *)&cmd_route_add2_nh_ip,
+ NULL,
+ },
+};
+
+/*
+ * route add (mpls = no, qinq = yes, arp = no)
+ */
+
+struct cmd_route_add3_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ struct ether_addr macaddr;
+ cmdline_fixed_string_t qinq_string;
+ uint32_t svlan;
+ uint32_t cvlan;
+};
+
+static void
+cmd_route_add3_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add3_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ int status;
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = PIPELINE_ROUTING_ROUTE_QINQ;
+ route_data.port_id = params->port;
+ route_data.ethernet.macaddr = params->macaddr;
+ route_data.l2.qinq.svlan = params->svlan;
+ route_data.l2.qinq.cvlan = params->cvlan;
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add3_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add3_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add3_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add3_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add3_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add3_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add3_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add3_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add3_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add3_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_etheraddr_t cmd_route_add3_macaddr =
+ TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add3_result, macaddr);
+
+static cmdline_parse_token_string_t cmd_route_add3_qinq_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add3_result, qinq_string,
+ "qinq");
+
+static cmdline_parse_token_num_t cmd_route_add3_svlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, svlan, UINT32);
+
+static cmdline_parse_token_num_t cmd_route_add3_cvlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add3_result, cvlan, UINT32);
+
+static cmdline_parse_inst_t cmd_route_add3 = {
+ .f = cmd_route_add3_parsed,
+ .data = NULL,
+ .help_str = "Route add (qinq = yes, arp = no)",
+ .tokens = {
+ (void *)&cmd_route_add3_p_string,
+ (void *)&cmd_route_add3_p,
+ (void *)&cmd_route_add3_route_string,
+ (void *)&cmd_route_add3_add_string,
+ (void *)&cmd_route_add3_ip,
+ (void *)&cmd_route_add3_depth,
+ (void *)&cmd_route_add3_port_string,
+ (void *)&cmd_route_add3_port,
+ (void *)&cmd_route_add3_ether_string,
+ (void *)&cmd_route_add3_macaddr,
+ (void *)&cmd_route_add3_qinq_string,
+ (void *)&cmd_route_add3_svlan,
+ (void *)&cmd_route_add3_cvlan,
+ NULL,
+ },
+};
+
+/*
+ * route add (mpls = no, qinq = yes, arp = yes)
+ */
+
+struct cmd_route_add4_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ cmdline_ipaddr_t nh_ip;
+ cmdline_fixed_string_t qinq_string;
+ uint32_t svlan;
+ uint32_t cvlan;
+};
+
+static void
+cmd_route_add4_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add4_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ int status;
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = PIPELINE_ROUTING_ROUTE_QINQ |
+ PIPELINE_ROUTING_ROUTE_ARP;
+ route_data.port_id = params->port;
+ route_data.ethernet.ip =
+ rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr);
+ route_data.l2.qinq.svlan = params->svlan;
+ route_data.l2.qinq.cvlan = params->cvlan;
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add4_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add4_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add4_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add4_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add4_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add4_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add4_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add4_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add4_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add4_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add4_nh_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add4_result, nh_ip);
+
+static cmdline_parse_token_string_t cmd_route_add4_qinq_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add4_result, qinq_string,
+ "qinq");
+
+static cmdline_parse_token_num_t cmd_route_add4_svlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, svlan, UINT32);
+
+static cmdline_parse_token_num_t cmd_route_add4_cvlan =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add4_result, cvlan, UINT32);
+
+static cmdline_parse_inst_t cmd_route_add4 = {
+ .f = cmd_route_add4_parsed,
+ .data = NULL,
+ .help_str = "Route add (qinq = yes, arp = yes)",
+ .tokens = {
+ (void *)&cmd_route_add4_p_string,
+ (void *)&cmd_route_add4_p,
+ (void *)&cmd_route_add4_route_string,
+ (void *)&cmd_route_add4_add_string,
+ (void *)&cmd_route_add4_ip,
+ (void *)&cmd_route_add4_depth,
+ (void *)&cmd_route_add4_port_string,
+ (void *)&cmd_route_add4_port,
+ (void *)&cmd_route_add4_ether_string,
+ (void *)&cmd_route_add4_nh_ip,
+ (void *)&cmd_route_add4_qinq_string,
+ (void *)&cmd_route_add4_svlan,
+ (void *)&cmd_route_add4_cvlan,
+ NULL,
+ },
+};
+
+/*
+ * route add (mpls = yes, qinq = no, arp = no)
+ */
+
+struct cmd_route_add5_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ struct ether_addr macaddr;
+ cmdline_fixed_string_t mpls_string;
+ cmdline_fixed_string_t mpls_labels;
+};
+
+static void
+cmd_route_add5_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add5_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ uint32_t mpls_labels[PIPELINE_ROUTING_MPLS_LABELS_MAX];
+ uint32_t n_labels = RTE_DIM(mpls_labels);
+ uint32_t i;
+ int status;
+
+ /* Parse MPLS labels */
+ status = parse_labels(params->mpls_labels, mpls_labels, &n_labels);
+ if (status) {
+ printf("MPLS labels parse error\n");
+ return;
+ }
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = PIPELINE_ROUTING_ROUTE_MPLS;
+ route_data.port_id = params->port;
+ route_data.ethernet.macaddr = params->macaddr;
+ for (i = 0; i < n_labels; i++)
+ route_data.l2.mpls.labels[i] = mpls_labels[i];
+ route_data.l2.mpls.n_labels = n_labels;
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add5_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add5_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add5_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add5_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add5_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add5_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add5_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add5_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add5_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add5_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add5_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_etheraddr_t cmd_route_add5_macaddr =
+ TOKEN_ETHERADDR_INITIALIZER(struct cmd_route_add5_result, macaddr);
+
+static cmdline_parse_token_string_t cmd_route_add5_mpls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, mpls_string,
+ "mpls");
+
+static cmdline_parse_token_string_t cmd_route_add5_mpls_labels =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add5_result, mpls_labels,
+ NULL);
+
+static cmdline_parse_inst_t cmd_route_add5 = {
+ .f = cmd_route_add5_parsed,
+ .data = NULL,
+ .help_str = "Route add (mpls = yes, arp = no)",
+ .tokens = {
+ (void *)&cmd_route_add5_p_string,
+ (void *)&cmd_route_add5_p,
+ (void *)&cmd_route_add5_route_string,
+ (void *)&cmd_route_add5_add_string,
+ (void *)&cmd_route_add5_ip,
+ (void *)&cmd_route_add5_depth,
+ (void *)&cmd_route_add5_port_string,
+ (void *)&cmd_route_add5_port,
+ (void *)&cmd_route_add5_ether_string,
+ (void *)&cmd_route_add5_macaddr,
+ (void *)&cmd_route_add5_mpls_string,
+ (void *)&cmd_route_add5_mpls_labels,
+ NULL,
+ },
+};
+
+/*
+ * route add (mpls = yes, qinq = no, arp = yes)
+ */
+
+struct cmd_route_add6_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+ cmdline_fixed_string_t port_string;
+ uint32_t port;
+ cmdline_fixed_string_t ether_string;
+ cmdline_ipaddr_t nh_ip;
+ cmdline_fixed_string_t mpls_string;
+ cmdline_fixed_string_t mpls_labels;
+};
+
+static void
+cmd_route_add6_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add6_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+ struct pipeline_routing_route_data route_data;
+ uint32_t mpls_labels[PIPELINE_ROUTING_MPLS_LABELS_MAX];
+ uint32_t n_labels = RTE_DIM(mpls_labels);
+ uint32_t i;
+ int status;
+
+ /* Parse MPLS labels */
+ status = parse_labels(params->mpls_labels, mpls_labels, &n_labels);
+ if (status) {
+ printf("MPLS labels parse error\n");
+ return;
+ }
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ route_data.flags = PIPELINE_ROUTING_ROUTE_MPLS |
+ PIPELINE_ROUTING_ROUTE_ARP;
+ route_data.port_id = params->port;
+ route_data.ethernet.ip =
+ rte_bswap32((uint32_t) params->nh_ip.addr.ipv4.s_addr);
+ for (i = 0; i < n_labels; i++)
+ route_data.l2.mpls.labels[i] = mpls_labels[i];
+ route_data.l2.mpls.n_labels = n_labels;
+
+ status = app_pipeline_routing_add_route(app,
+ params->p,
+ &key,
+ &route_data);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add6_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add6_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add6_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_add6_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, add_string,
+ "add");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add6_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add6_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_add6_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, depth, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add6_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, port_string,
+ "port");
+
+static cmdline_parse_token_num_t cmd_route_add6_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add6_result, port, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_add6_ether_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, ether_string,
+ "ether");
+
+static cmdline_parse_token_ipaddr_t cmd_route_add6_nh_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_add6_result, nh_ip);
+
+static cmdline_parse_token_string_t cmd_route_add6_mpls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, mpls_string,
+ "mpls");
+
+static cmdline_parse_token_string_t cmd_route_add6_mpls_labels =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add6_result, mpls_labels,
+ NULL);
+
+static cmdline_parse_inst_t cmd_route_add6 = {
+ .f = cmd_route_add6_parsed,
+ .data = NULL,
+ .help_str = "Route add (mpls = yes, arp = yes)",
+ .tokens = {
+ (void *)&cmd_route_add6_p_string,
+ (void *)&cmd_route_add6_p,
+ (void *)&cmd_route_add6_route_string,
+ (void *)&cmd_route_add6_add_string,
+ (void *)&cmd_route_add6_ip,
+ (void *)&cmd_route_add6_depth,
+ (void *)&cmd_route_add6_port_string,
+ (void *)&cmd_route_add6_port,
+ (void *)&cmd_route_add6_ether_string,
+ (void *)&cmd_route_add6_nh_ip,
+ (void *)&cmd_route_add6_mpls_string,
+ (void *)&cmd_route_add6_mpls_labels,
+ NULL,
+ },
+};
+
+/*
+ * route del
+ */
+
+struct cmd_route_del_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_ipaddr_t ip;
+ uint32_t depth;
+};
+
+static void
+cmd_route_del_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_del_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing_route_key key;
+
+ int status;
+
+ /* Create route */
+ key.type = PIPELINE_ROUTING_ROUTE_IPV4;
+ key.key.ipv4.ip = rte_bswap32((uint32_t) params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.depth = params->depth;
+
+ status = app_pipeline_routing_delete_route(app, params->p, &key);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_del_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_del_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_del_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_del_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, route_string,
+ "route");
+
+static cmdline_parse_token_string_t cmd_route_del_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_result, del_string,
+ "del");
+
+static cmdline_parse_token_ipaddr_t cmd_route_del_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_route_del_result, ip);
+
+static cmdline_parse_token_num_t cmd_route_del_depth =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_del_result, depth, UINT32);
+
+static cmdline_parse_inst_t cmd_route_del = {
+ .f = cmd_route_del_parsed,
+ .data = NULL,
+ .help_str = "Route delete",
+ .tokens = {
+ (void *)&cmd_route_del_p_string,
+ (void *)&cmd_route_del_p,
+ (void *)&cmd_route_del_route_string,
+ (void *)&cmd_route_del_del_string,
+ (void *)&cmd_route_del_ip,
+ (void *)&cmd_route_del_depth,
+ NULL,
+ },
+};
+
+/*
+ * route add default
+ */
+
+struct cmd_route_add_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t default_string;
+ uint32_t port;
+};
+
+static void
+cmd_route_add_default_parsed(
+ void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_add_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_routing_add_default_route(app, params->p,
+ params->port);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_add_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_add_default_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add_default_result, p, UINT32);
+
+cmdline_parse_token_string_t cmd_route_add_default_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result,
+ route_string, "route");
+
+cmdline_parse_token_string_t cmd_route_add_default_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result,
+ add_string, "add");
+
+cmdline_parse_token_string_t cmd_route_add_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_add_default_result,
+ default_string, "default");
+
+cmdline_parse_token_num_t cmd_route_add_default_port =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_add_default_result,
+ port, UINT32);
+
+cmdline_parse_inst_t cmd_route_add_default = {
+ .f = cmd_route_add_default_parsed,
+ .data = NULL,
+ .help_str = "Route default set",
+ .tokens = {
+ (void *)&cmd_route_add_default_p_string,
+ (void *)&cmd_route_add_default_p,
+ (void *)&cmd_route_add_default_route_string,
+ (void *)&cmd_route_add_default_add_string,
+ (void *)&cmd_route_add_default_default_string,
+ (void *)&cmd_route_add_default_port,
+ NULL,
+ },
+};
+
+/*
+ * route del default
+ */
+
+struct cmd_route_del_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t default_string;
+};
+
+static void
+cmd_route_del_default_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_del_default_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_routing_delete_default_route(app, params->p);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_del_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_route_del_default_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_del_default_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_del_default_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result,
+ route_string, "route");
+
+static cmdline_parse_token_string_t cmd_route_del_default_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result,
+ del_string, "del");
+
+static cmdline_parse_token_string_t cmd_route_del_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_del_default_result,
+ default_string, "default");
+
+
+static cmdline_parse_inst_t cmd_route_del_default = {
+ .f = cmd_route_del_default_parsed,
+ .data = NULL,
+ .help_str = "Route default clear",
+ .tokens = {
+ (void *)&cmd_route_del_default_p_string,
+ (void *)&cmd_route_del_default_p,
+ (void *)&cmd_route_del_default_route_string,
+ (void *)&cmd_route_del_default_del_string,
+ (void *)&cmd_route_del_default_default_string,
+ NULL,
+ },
+};
+
+/*
+ * route ls
+ */
+
+struct cmd_route_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t route_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_route_ls_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_route_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+
+ status = app_pipeline_routing_route_ls(app, params->p);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_route_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result, p_string, "p");
+
+static cmdline_parse_token_num_t cmd_route_ls_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_route_ls_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_route_ls_route_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result,
+ route_string, "route");
+
+static cmdline_parse_token_string_t cmd_route_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_route_ls_result, ls_string,
+ "ls");
+
+static cmdline_parse_inst_t cmd_route_ls = {
+ .f = cmd_route_ls_parsed,
+ .data = NULL,
+ .help_str = "Route list",
+ .tokens = {
+ (void *)&cmd_route_ls_p_string,
+ (void *)&cmd_route_ls_p,
+ (void *)&cmd_route_ls_route_string,
+ (void *)&cmd_route_ls_ls_string,
+ NULL,
+ },
+};
+
+/*
+ * arp add
+ */
+
+struct cmd_arp_add_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t arp_string;
+ cmdline_fixed_string_t add_string;
+ uint32_t port_id;
+ cmdline_ipaddr_t ip;
+ struct ether_addr macaddr;
+
+};
+
+static void
+cmd_arp_add_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_arp_add_result *params = parsed_result;
+ struct app_params *app = data;
+
+ struct pipeline_routing_arp_key key;
+ int status;
+
+ key.type = PIPELINE_ROUTING_ARP_IPV4;
+ key.key.ipv4.port_id = params->port_id;
+ key.key.ipv4.ip = rte_cpu_to_be_32(params->ip.addr.ipv4.s_addr);
+
+ status = app_pipeline_routing_add_arp_entry(app,
+ params->p,
+ &key,
+ &params->macaddr);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_arp_add_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_arp_add_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_add_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_arp_add_arp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, arp_string, "arp");
+
+static cmdline_parse_token_string_t cmd_arp_add_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_result, add_string, "add");
+
+static cmdline_parse_token_num_t cmd_arp_add_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_add_result, port_id, UINT32);
+
+static cmdline_parse_token_ipaddr_t cmd_arp_add_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_arp_add_result, ip);
+
+static cmdline_parse_token_etheraddr_t cmd_arp_add_macaddr =
+ TOKEN_ETHERADDR_INITIALIZER(struct cmd_arp_add_result, macaddr);
+
+static cmdline_parse_inst_t cmd_arp_add = {
+ .f = cmd_arp_add_parsed,
+ .data = NULL,
+ .help_str = "ARP add",
+ .tokens = {
+ (void *)&cmd_arp_add_p_string,
+ (void *)&cmd_arp_add_p,
+ (void *)&cmd_arp_add_arp_string,
+ (void *)&cmd_arp_add_add_string,
+ (void *)&cmd_arp_add_port_id,
+ (void *)&cmd_arp_add_ip,
+ (void *)&cmd_arp_add_macaddr,
+ NULL,
+ },
+};
+
+/*
+ * arp del
+ */
+
+struct cmd_arp_del_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t arp_string;
+ cmdline_fixed_string_t del_string;
+ uint32_t port_id;
+ cmdline_ipaddr_t ip;
+};
+
+static void
+cmd_arp_del_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_arp_del_result *params = parsed_result;
+ struct app_params *app = data;
+
+ struct pipeline_routing_arp_key key;
+ int status;
+
+ key.type = PIPELINE_ROUTING_ARP_IPV4;
+ key.key.ipv4.ip = rte_cpu_to_be_32(params->ip.addr.ipv4.s_addr);
+ key.key.ipv4.port_id = params->port_id;
+
+ status = app_pipeline_routing_delete_arp_entry(app, params->p, &key);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_arp_del_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_arp_del_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_del_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_arp_del_arp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, arp_string, "arp");
+
+static cmdline_parse_token_string_t cmd_arp_del_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_result, del_string, "del");
+
+static cmdline_parse_token_num_t cmd_arp_del_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_del_result, port_id, UINT32);
+
+static cmdline_parse_token_ipaddr_t cmd_arp_del_ip =
+ TOKEN_IPV4_INITIALIZER(struct cmd_arp_del_result, ip);
+
+static cmdline_parse_inst_t cmd_arp_del = {
+ .f = cmd_arp_del_parsed,
+ .data = NULL,
+ .help_str = "ARP delete",
+ .tokens = {
+ (void *)&cmd_arp_del_p_string,
+ (void *)&cmd_arp_del_p,
+ (void *)&cmd_arp_del_arp_string,
+ (void *)&cmd_arp_del_del_string,
+ (void *)&cmd_arp_del_port_id,
+ (void *)&cmd_arp_del_ip,
+ NULL,
+ },
+};
+
+/*
+ * arp add default
+ */
+
+struct cmd_arp_add_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t arp_string;
+ cmdline_fixed_string_t add_string;
+ cmdline_fixed_string_t default_string;
+ uint32_t port_id;
+};
+
+static void
+cmd_arp_add_default_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_arp_add_default_result *params = parsed_result;
+ struct app_params *app = data;
+
+ int status;
+
+ status = app_pipeline_routing_add_default_arp_entry(app,
+ params->p,
+ params->port_id);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_arp_add_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_arp_add_default_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_add_default_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_arp_add_default_arp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, arp_string,
+ "arp");
+
+static cmdline_parse_token_string_t cmd_arp_add_default_add_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result, add_string,
+ "add");
+
+static cmdline_parse_token_string_t cmd_arp_add_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_add_default_result,
+ default_string, "default");
+
+static cmdline_parse_token_num_t cmd_arp_add_default_port_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_add_default_result, port_id,
+ UINT32);
+
+static cmdline_parse_inst_t cmd_arp_add_default = {
+ .f = cmd_arp_add_default_parsed,
+ .data = NULL,
+ .help_str = "ARP add default",
+ .tokens = {
+ (void *)&cmd_arp_add_default_p_string,
+ (void *)&cmd_arp_add_default_p,
+ (void *)&cmd_arp_add_default_arp_string,
+ (void *)&cmd_arp_add_default_add_string,
+ (void *)&cmd_arp_add_default_default_string,
+ (void *)&cmd_arp_add_default_port_id,
+ NULL,
+ },
+};
+
+/*
+ * arp del default
+ */
+
+struct cmd_arp_del_default_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t arp_string;
+ cmdline_fixed_string_t del_string;
+ cmdline_fixed_string_t default_string;
+};
+
+static void
+cmd_arp_del_default_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_arp_del_default_result *params = parsed_result;
+ struct app_params *app = data;
+
+ int status;
+
+ status = app_pipeline_routing_delete_default_arp_entry(app, params->p);
+
+ if (status != 0) {
+ printf("Command failed\n");
+ return;
+ }
+}
+
+static cmdline_parse_token_string_t cmd_arp_del_default_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_arp_del_default_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_del_default_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_arp_del_default_arp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, arp_string,
+ "arp");
+
+static cmdline_parse_token_string_t cmd_arp_del_default_del_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result, del_string,
+ "del");
+
+static cmdline_parse_token_string_t cmd_arp_del_default_default_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_del_default_result,
+ default_string, "default");
+
+static cmdline_parse_inst_t cmd_arp_del_default = {
+ .f = cmd_arp_del_default_parsed,
+ .data = NULL,
+ .help_str = "ARP delete default",
+ .tokens = {
+ (void *)&cmd_arp_del_default_p_string,
+ (void *)&cmd_arp_del_default_p,
+ (void *)&cmd_arp_del_default_arp_string,
+ (void *)&cmd_arp_del_default_del_string,
+ (void *)&cmd_arp_del_default_default_string,
+ NULL,
+ },
+};
+
+/*
+ * arp ls
+ */
+
+struct cmd_arp_ls_result {
+ cmdline_fixed_string_t p_string;
+ uint32_t p;
+ cmdline_fixed_string_t arp_string;
+ cmdline_fixed_string_t ls_string;
+};
+
+static void
+cmd_arp_ls_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_arp_ls_result *params = parsed_result;
+ struct app_params *app = data;
+ struct pipeline_routing *p;
+
+ p = app_pipeline_data_fe(app, params->p, &pipeline_routing);
+ if (p == NULL)
+ return;
+
+ app_pipeline_routing_arp_ls(app, params->p);
+}
+
+static cmdline_parse_token_string_t cmd_arp_ls_p_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, p_string,
+ "p");
+
+static cmdline_parse_token_num_t cmd_arp_ls_p =
+ TOKEN_NUM_INITIALIZER(struct cmd_arp_ls_result, p, UINT32);
+
+static cmdline_parse_token_string_t cmd_arp_ls_arp_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, arp_string,
+ "arp");
+
+static cmdline_parse_token_string_t cmd_arp_ls_ls_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_arp_ls_result, ls_string,
+ "ls");
+
+static cmdline_parse_inst_t cmd_arp_ls = {
+ .f = cmd_arp_ls_parsed,
+ .data = NULL,
+ .help_str = "ARP list",
+ .tokens = {
+ (void *)&cmd_arp_ls_p_string,
+ (void *)&cmd_arp_ls_p,
+ (void *)&cmd_arp_ls_arp_string,
+ (void *)&cmd_arp_ls_ls_string,
+ NULL,
+ },
+};
+
+static cmdline_parse_ctx_t pipeline_cmds[] = {
+ (cmdline_parse_inst_t *)&cmd_route_add1,
+ (cmdline_parse_inst_t *)&cmd_route_add2,
+ (cmdline_parse_inst_t *)&cmd_route_add3,
+ (cmdline_parse_inst_t *)&cmd_route_add4,
+ (cmdline_parse_inst_t *)&cmd_route_add5,
+ (cmdline_parse_inst_t *)&cmd_route_add6,
+ (cmdline_parse_inst_t *)&cmd_route_del,
+ (cmdline_parse_inst_t *)&cmd_route_add_default,
+ (cmdline_parse_inst_t *)&cmd_route_del_default,
+ (cmdline_parse_inst_t *)&cmd_route_ls,
+ (cmdline_parse_inst_t *)&cmd_arp_add,
+ (cmdline_parse_inst_t *)&cmd_arp_del,
+ (cmdline_parse_inst_t *)&cmd_arp_add_default,
+ (cmdline_parse_inst_t *)&cmd_arp_del_default,
+ (cmdline_parse_inst_t *)&cmd_arp_ls,
+ NULL,
+};
+
+static struct pipeline_fe_ops pipeline_routing_fe_ops = {
+ .f_init = pipeline_routing_init,
+ .f_free = app_pipeline_routing_free,
+ .cmds = pipeline_cmds,
+};
+
+struct pipeline_type pipeline_routing = {
+ .name = "ROUTING",
+ .be_ops = &pipeline_routing_be_ops,
+ .fe_ops = &pipeline_routing_fe_ops,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_routing.h b/examples/ip_pipeline/pipeline/pipeline_routing.h
new file mode 100644
index 00000000..fa41642b
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_routing.h
@@ -0,0 +1,93 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_ROUTING_H__
+#define __INCLUDE_PIPELINE_ROUTING_H__
+
+#include "pipeline.h"
+#include "pipeline_routing_be.h"
+
+/*
+ * Route
+ */
+
+int
+app_pipeline_routing_add_route(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_route_key *key,
+ struct pipeline_routing_route_data *data);
+
+int
+app_pipeline_routing_delete_route(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_route_key *key);
+
+int
+app_pipeline_routing_add_default_route(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_pipeline_routing_delete_default_route(struct app_params *app,
+ uint32_t pipeline_id);
+
+/*
+ * ARP
+ */
+
+int
+app_pipeline_routing_add_arp_entry(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_arp_key *key,
+ struct ether_addr *macaddr);
+
+int
+app_pipeline_routing_delete_arp_entry(struct app_params *app,
+ uint32_t pipeline_id,
+ struct pipeline_routing_arp_key *key);
+
+int
+app_pipeline_routing_add_default_arp_entry(struct app_params *app,
+ uint32_t pipeline_id,
+ uint32_t port_id);
+
+int
+app_pipeline_routing_delete_default_arp_entry(struct app_params *app,
+ uint32_t pipeline_id);
+
+/*
+ * Pipeline type
+ */
+extern struct pipeline_type pipeline_routing;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline/pipeline_routing_be.c b/examples/ip_pipeline/pipeline/pipeline_routing_be.c
new file mode 100644
index 00000000..bc5bf7a5
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_routing_be.c
@@ -0,0 +1,1970 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+#include <rte_byteorder.h>
+#include <rte_table_lpm.h>
+#include <rte_table_hash.h>
+#include <rte_pipeline.h>
+
+#include "pipeline_routing_be.h"
+#include "pipeline_actions_common.h"
+#include "parser.h"
+#include "hash_func.h"
+
+#define MPLS_LABEL(label, exp, s, ttl) \
+ (((((uint64_t) (label)) & 0xFFFFFLLU) << 12) | \
+ ((((uint64_t) (exp)) & 0x7LLU) << 9) | \
+ ((((uint64_t) (s)) & 0x1LLU) << 8) | \
+ (((uint64_t) (ttl)) & 0xFFLU))
+
+#define RTE_SCHED_PORT_HIERARCHY(subport, pipe, \
+ traffic_class, queue, color) \
+ ((((uint64_t) (queue)) & 0x3) | \
+ ((((uint64_t) (traffic_class)) & 0x3) << 2) | \
+ ((((uint64_t) (color)) & 0x3) << 4) | \
+ ((((uint64_t) (subport)) & 0xFFFF) << 16) | \
+ ((((uint64_t) (pipe)) & 0xFFFFFFFF) << 32))
+
+
+#define MAC_SRC_DEFAULT 0x112233445566ULL
+
+#ifndef PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s
+#define PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s 256
+#endif
+
+struct pipeline_routing {
+ struct pipeline p;
+ struct pipeline_routing_params params;
+ pipeline_msg_req_handler custom_handlers[PIPELINE_ROUTING_MSG_REQS];
+} __rte_cache_aligned;
+
+/*
+ * Message handlers
+ */
+static void *
+pipeline_routing_msg_req_custom_handler(struct pipeline *p, void *msg);
+
+static pipeline_msg_req_handler handlers[] = {
+ [PIPELINE_MSG_REQ_PING] =
+ pipeline_msg_req_ping_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_IN] =
+ pipeline_msg_req_stats_port_in_handler,
+ [PIPELINE_MSG_REQ_STATS_PORT_OUT] =
+ pipeline_msg_req_stats_port_out_handler,
+ [PIPELINE_MSG_REQ_STATS_TABLE] =
+ pipeline_msg_req_stats_table_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_ENABLE] =
+ pipeline_msg_req_port_in_enable_handler,
+ [PIPELINE_MSG_REQ_PORT_IN_DISABLE] =
+ pipeline_msg_req_port_in_disable_handler,
+ [PIPELINE_MSG_REQ_CUSTOM] =
+ pipeline_routing_msg_req_custom_handler,
+};
+
+static void *
+pipeline_routing_msg_req_route_add_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_route_del_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_route_add_default_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_route_del_default_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_arp_add_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_arp_del_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_arp_add_default_handler(struct pipeline *p,
+ void *msg);
+
+static void *
+pipeline_routing_msg_req_arp_del_default_handler(struct pipeline *p,
+ void *msg);
+
+static pipeline_msg_req_handler custom_handlers[] = {
+ [PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD] =
+ pipeline_routing_msg_req_route_add_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL] =
+ pipeline_routing_msg_req_route_del_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT] =
+ pipeline_routing_msg_req_route_add_default_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT] =
+ pipeline_routing_msg_req_route_del_default_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ARP_ADD] =
+ pipeline_routing_msg_req_arp_add_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ARP_DEL] =
+ pipeline_routing_msg_req_arp_del_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT] =
+ pipeline_routing_msg_req_arp_add_default_handler,
+ [PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT] =
+ pipeline_routing_msg_req_arp_del_default_handler,
+};
+
+/*
+ * Routing table
+ */
+struct routing_table_entry {
+ struct rte_pipeline_table_entry head;
+ uint32_t flags;
+ uint32_t port_id; /* Output port ID */
+ uint32_t ip; /* Next hop IP address (only valid for remote routes) */
+
+ /* ether_l2 */
+ uint16_t data_offset;
+ uint16_t ether_l2_length;
+ uint64_t slab[4];
+ uint16_t slab_offset[4];
+};
+
+struct layout {
+ uint16_t a;
+ uint32_t b;
+ uint16_t c;
+} __attribute__((__packed__));
+
+#define MACADDR_DST_WRITE(slab_ptr, slab) \
+{ \
+ struct layout *dst = (struct layout *) (slab_ptr); \
+ struct layout *src = (struct layout *) &(slab); \
+ \
+ dst->b = src->b; \
+ dst->c = src->c; \
+}
+
+static inline __attribute__((always_inline)) void
+pkt_work_routing(
+ struct rte_mbuf *pkt,
+ struct rte_pipeline_table_entry *table_entry,
+ void *arg,
+ int arp,
+ int qinq,
+ int qinq_sched,
+ int mpls,
+ int mpls_color_mark)
+{
+ struct pipeline_routing *p_rt = arg;
+
+ struct routing_table_entry *entry =
+ (struct routing_table_entry *) table_entry;
+
+ struct ipv4_hdr *ip = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkt, p_rt->params.ip_hdr_offset);
+
+ enum rte_meter_color pkt_color = (enum rte_meter_color)
+ RTE_MBUF_METADATA_UINT32(pkt, p_rt->params.color_offset);
+
+ struct pipeline_routing_arp_key_ipv4 *arp_key =
+ (struct pipeline_routing_arp_key_ipv4 *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkt, p_rt->params.arp_key_offset);
+
+ uint64_t *slab0_ptr, *slab1_ptr, *slab2_ptr, *slab3_ptr, sched;
+ uint32_t ip_da, nh_ip, port_id;
+ uint16_t total_length, data_offset, ether_l2_length;
+
+ /* Read */
+ total_length = rte_bswap16(ip->total_length);
+ ip_da = ip->dst_addr;
+ data_offset = entry->data_offset;
+ ether_l2_length = entry->ether_l2_length;
+ slab0_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[0]);
+ slab1_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[1]);
+ slab2_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[2]);
+ slab3_ptr = RTE_MBUF_METADATA_UINT64_PTR(pkt, entry->slab_offset[3]);
+
+ if (arp) {
+ port_id = entry->port_id;
+ nh_ip = entry->ip;
+ if (entry->flags & PIPELINE_ROUTING_ROUTE_LOCAL)
+ nh_ip = ip_da;
+ }
+
+ /* Compute */
+ total_length += ether_l2_length;
+
+ if (qinq && qinq_sched) {
+ uint32_t dscp = ip->type_of_service >> 2;
+ uint32_t svlan, cvlan, tc, tc_q;
+
+ if (qinq_sched == 1) {
+ uint64_t slab_qinq = rte_bswap64(entry->slab[0]);
+
+ svlan = (slab_qinq >> 48) & 0xFFF;
+ cvlan = (slab_qinq >> 16) & 0xFFF;
+ tc = (dscp >> 2) & 0x3;
+ tc_q = dscp & 0x3;
+ } else {
+ uint32_t ip_src = rte_bswap32(ip->src_addr);
+
+ svlan = 0;
+ cvlan = (ip_src >> 16) & 0xFFF;
+ tc = (ip_src >> 2) & 0x3;
+ tc_q = ip_src & 0x3;
+ }
+ sched = RTE_SCHED_PORT_HIERARCHY(svlan,
+ cvlan,
+ tc,
+ tc_q,
+ e_RTE_METER_GREEN);
+ }
+
+ /* Write */
+ pkt->data_off = data_offset;
+ pkt->data_len = total_length;
+ pkt->pkt_len = total_length;
+
+ if ((qinq == 0) && (mpls == 0)) {
+ *slab0_ptr = entry->slab[0];
+
+ if (arp == 0)
+ MACADDR_DST_WRITE(slab1_ptr, entry->slab[1]);
+ }
+
+ if (qinq) {
+ *slab0_ptr = entry->slab[0];
+ *slab1_ptr = entry->slab[1];
+
+ if (arp == 0)
+ MACADDR_DST_WRITE(slab2_ptr, entry->slab[2]);
+
+ if (qinq_sched) {
+ pkt->hash.sched.lo = sched & 0xFFFFFFFF;
+ pkt->hash.sched.hi = sched >> 32;
+ }
+ }
+
+ if (mpls) {
+ if (mpls_color_mark) {
+ uint64_t mpls_exp = rte_bswap64(
+ (MPLS_LABEL(0, pkt_color, 0, 0) << 32) |
+ MPLS_LABEL(0, pkt_color, 0, 0));
+
+ *slab0_ptr = entry->slab[0] | mpls_exp;
+ *slab1_ptr = entry->slab[1] | mpls_exp;
+ *slab2_ptr = entry->slab[2];
+ } else {
+ *slab0_ptr = entry->slab[0];
+ *slab1_ptr = entry->slab[1];
+ *slab2_ptr = entry->slab[2];
+ }
+
+ if (arp == 0)
+ MACADDR_DST_WRITE(slab3_ptr, entry->slab[3]);
+ }
+
+ if (arp) {
+ arp_key->port_id = port_id;
+ arp_key->ip = nh_ip;
+ }
+}
+
+static inline __attribute__((always_inline)) void
+pkt4_work_routing(
+ struct rte_mbuf **pkts,
+ struct rte_pipeline_table_entry **table_entries,
+ void *arg,
+ int arp,
+ int qinq,
+ int qinq_sched,
+ int mpls,
+ int mpls_color_mark)
+{
+ struct pipeline_routing *p_rt = arg;
+
+ struct routing_table_entry *entry0 =
+ (struct routing_table_entry *) table_entries[0];
+ struct routing_table_entry *entry1 =
+ (struct routing_table_entry *) table_entries[1];
+ struct routing_table_entry *entry2 =
+ (struct routing_table_entry *) table_entries[2];
+ struct routing_table_entry *entry3 =
+ (struct routing_table_entry *) table_entries[3];
+
+ struct ipv4_hdr *ip0 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[0],
+ p_rt->params.ip_hdr_offset);
+ struct ipv4_hdr *ip1 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[1],
+ p_rt->params.ip_hdr_offset);
+ struct ipv4_hdr *ip2 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[2],
+ p_rt->params.ip_hdr_offset);
+ struct ipv4_hdr *ip3 = (struct ipv4_hdr *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[3],
+ p_rt->params.ip_hdr_offset);
+
+ enum rte_meter_color pkt0_color = (enum rte_meter_color)
+ RTE_MBUF_METADATA_UINT32(pkts[0], p_rt->params.color_offset);
+ enum rte_meter_color pkt1_color = (enum rte_meter_color)
+ RTE_MBUF_METADATA_UINT32(pkts[1], p_rt->params.color_offset);
+ enum rte_meter_color pkt2_color = (enum rte_meter_color)
+ RTE_MBUF_METADATA_UINT32(pkts[2], p_rt->params.color_offset);
+ enum rte_meter_color pkt3_color = (enum rte_meter_color)
+ RTE_MBUF_METADATA_UINT32(pkts[3], p_rt->params.color_offset);
+
+ struct pipeline_routing_arp_key_ipv4 *arp_key0 =
+ (struct pipeline_routing_arp_key_ipv4 *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[0],
+ p_rt->params.arp_key_offset);
+ struct pipeline_routing_arp_key_ipv4 *arp_key1 =
+ (struct pipeline_routing_arp_key_ipv4 *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[1],
+ p_rt->params.arp_key_offset);
+ struct pipeline_routing_arp_key_ipv4 *arp_key2 =
+ (struct pipeline_routing_arp_key_ipv4 *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[2],
+ p_rt->params.arp_key_offset);
+ struct pipeline_routing_arp_key_ipv4 *arp_key3 =
+ (struct pipeline_routing_arp_key_ipv4 *)
+ RTE_MBUF_METADATA_UINT8_PTR(pkts[3],
+ p_rt->params.arp_key_offset);
+
+ uint64_t *slab0_ptr0, *slab1_ptr0, *slab2_ptr0, *slab3_ptr0;
+ uint64_t *slab0_ptr1, *slab1_ptr1, *slab2_ptr1, *slab3_ptr1;
+ uint64_t *slab0_ptr2, *slab1_ptr2, *slab2_ptr2, *slab3_ptr2;
+ uint64_t *slab0_ptr3, *slab1_ptr3, *slab2_ptr3, *slab3_ptr3;
+ uint64_t sched0, sched1, sched2, sched3;
+
+ uint32_t ip_da0, nh_ip0, port_id0;
+ uint32_t ip_da1, nh_ip1, port_id1;
+ uint32_t ip_da2, nh_ip2, port_id2;
+ uint32_t ip_da3, nh_ip3, port_id3;
+
+ uint16_t total_length0, data_offset0, ether_l2_length0;
+ uint16_t total_length1, data_offset1, ether_l2_length1;
+ uint16_t total_length2, data_offset2, ether_l2_length2;
+ uint16_t total_length3, data_offset3, ether_l2_length3;
+
+ /* Read */
+ total_length0 = rte_bswap16(ip0->total_length);
+ total_length1 = rte_bswap16(ip1->total_length);
+ total_length2 = rte_bswap16(ip2->total_length);
+ total_length3 = rte_bswap16(ip3->total_length);
+
+ ip_da0 = ip0->dst_addr;
+ ip_da1 = ip1->dst_addr;
+ ip_da2 = ip2->dst_addr;
+ ip_da3 = ip3->dst_addr;
+
+ data_offset0 = entry0->data_offset;
+ data_offset1 = entry1->data_offset;
+ data_offset2 = entry2->data_offset;
+ data_offset3 = entry3->data_offset;
+
+ ether_l2_length0 = entry0->ether_l2_length;
+ ether_l2_length1 = entry1->ether_l2_length;
+ ether_l2_length2 = entry2->ether_l2_length;
+ ether_l2_length3 = entry3->ether_l2_length;
+
+ slab0_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ entry0->slab_offset[0]);
+ slab1_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ entry0->slab_offset[1]);
+ slab2_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ entry0->slab_offset[2]);
+ slab3_ptr0 = RTE_MBUF_METADATA_UINT64_PTR(pkts[0],
+ entry0->slab_offset[3]);
+
+ slab0_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ entry1->slab_offset[0]);
+ slab1_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ entry1->slab_offset[1]);
+ slab2_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ entry1->slab_offset[2]);
+ slab3_ptr1 = RTE_MBUF_METADATA_UINT64_PTR(pkts[1],
+ entry1->slab_offset[3]);
+
+ slab0_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ entry2->slab_offset[0]);
+ slab1_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ entry2->slab_offset[1]);
+ slab2_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ entry2->slab_offset[2]);
+ slab3_ptr2 = RTE_MBUF_METADATA_UINT64_PTR(pkts[2],
+ entry2->slab_offset[3]);
+
+ slab0_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ entry3->slab_offset[0]);
+ slab1_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ entry3->slab_offset[1]);
+ slab2_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ entry3->slab_offset[2]);
+ slab3_ptr3 = RTE_MBUF_METADATA_UINT64_PTR(pkts[3],
+ entry3->slab_offset[3]);
+
+ if (arp) {
+ port_id0 = entry0->port_id;
+ nh_ip0 = entry0->ip;
+ if (entry0->flags & PIPELINE_ROUTING_ROUTE_LOCAL)
+ nh_ip0 = ip_da0;
+
+ port_id1 = entry1->port_id;
+ nh_ip1 = entry1->ip;
+ if (entry1->flags & PIPELINE_ROUTING_ROUTE_LOCAL)
+ nh_ip1 = ip_da1;
+
+ port_id2 = entry2->port_id;
+ nh_ip2 = entry2->ip;
+ if (entry2->flags & PIPELINE_ROUTING_ROUTE_LOCAL)
+ nh_ip2 = ip_da2;
+
+ port_id3 = entry3->port_id;
+ nh_ip3 = entry3->ip;
+ if (entry3->flags & PIPELINE_ROUTING_ROUTE_LOCAL)
+ nh_ip3 = ip_da3;
+ }
+
+ /* Compute */
+ total_length0 += ether_l2_length0;
+ total_length1 += ether_l2_length1;
+ total_length2 += ether_l2_length2;
+ total_length3 += ether_l2_length3;
+
+ if (qinq && qinq_sched) {
+ uint32_t dscp0 = ip0->type_of_service >> 2;
+ uint32_t dscp1 = ip1->type_of_service >> 2;
+ uint32_t dscp2 = ip2->type_of_service >> 2;
+ uint32_t dscp3 = ip3->type_of_service >> 2;
+ uint32_t svlan0, cvlan0, tc0, tc_q0;
+ uint32_t svlan1, cvlan1, tc1, tc_q1;
+ uint32_t svlan2, cvlan2, tc2, tc_q2;
+ uint32_t svlan3, cvlan3, tc3, tc_q3;
+
+ if (qinq_sched == 1) {
+ uint64_t slab_qinq0 = rte_bswap64(entry0->slab[0]);
+ uint64_t slab_qinq1 = rte_bswap64(entry1->slab[0]);
+ uint64_t slab_qinq2 = rte_bswap64(entry2->slab[0]);
+ uint64_t slab_qinq3 = rte_bswap64(entry3->slab[0]);
+
+ svlan0 = (slab_qinq0 >> 48) & 0xFFF;
+ svlan1 = (slab_qinq1 >> 48) & 0xFFF;
+ svlan2 = (slab_qinq2 >> 48) & 0xFFF;
+ svlan3 = (slab_qinq3 >> 48) & 0xFFF;
+
+ cvlan0 = (slab_qinq0 >> 16) & 0xFFF;
+ cvlan1 = (slab_qinq1 >> 16) & 0xFFF;
+ cvlan2 = (slab_qinq2 >> 16) & 0xFFF;
+ cvlan3 = (slab_qinq3 >> 16) & 0xFFF;
+
+ tc0 = (dscp0 >> 2) & 0x3;
+ tc1 = (dscp1 >> 2) & 0x3;
+ tc2 = (dscp2 >> 2) & 0x3;
+ tc3 = (dscp3 >> 2) & 0x3;
+
+ tc_q0 = dscp0 & 0x3;
+ tc_q1 = dscp1 & 0x3;
+ tc_q2 = dscp2 & 0x3;
+ tc_q3 = dscp3 & 0x3;
+ } else {
+ uint32_t ip_src0 = rte_bswap32(ip0->src_addr);
+ uint32_t ip_src1 = rte_bswap32(ip1->src_addr);
+ uint32_t ip_src2 = rte_bswap32(ip2->src_addr);
+ uint32_t ip_src3 = rte_bswap32(ip3->src_addr);
+
+ svlan0 = 0;
+ svlan1 = 0;
+ svlan2 = 0;
+ svlan3 = 0;
+
+ cvlan0 = (ip_src0 >> 16) & 0xFFF;
+ cvlan1 = (ip_src1 >> 16) & 0xFFF;
+ cvlan2 = (ip_src2 >> 16) & 0xFFF;
+ cvlan3 = (ip_src3 >> 16) & 0xFFF;
+
+ tc0 = (ip_src0 >> 2) & 0x3;
+ tc1 = (ip_src1 >> 2) & 0x3;
+ tc2 = (ip_src2 >> 2) & 0x3;
+ tc3 = (ip_src3 >> 2) & 0x3;
+
+ tc_q0 = ip_src0 & 0x3;
+ tc_q1 = ip_src1 & 0x3;
+ tc_q2 = ip_src2 & 0x3;
+ tc_q3 = ip_src3 & 0x3;
+ }
+
+ sched0 = RTE_SCHED_PORT_HIERARCHY(svlan0,
+ cvlan0,
+ tc0,
+ tc_q0,
+ e_RTE_METER_GREEN);
+ sched1 = RTE_SCHED_PORT_HIERARCHY(svlan1,
+ cvlan1,
+ tc1,
+ tc_q1,
+ e_RTE_METER_GREEN);
+ sched2 = RTE_SCHED_PORT_HIERARCHY(svlan2,
+ cvlan2,
+ tc2,
+ tc_q2,
+ e_RTE_METER_GREEN);
+ sched3 = RTE_SCHED_PORT_HIERARCHY(svlan3,
+ cvlan3,
+ tc3,
+ tc_q3,
+ e_RTE_METER_GREEN);
+
+ }
+
+ /* Write */
+ pkts[0]->data_off = data_offset0;
+ pkts[1]->data_off = data_offset1;
+ pkts[2]->data_off = data_offset2;
+ pkts[3]->data_off = data_offset3;
+
+ pkts[0]->data_len = total_length0;
+ pkts[1]->data_len = total_length1;
+ pkts[2]->data_len = total_length2;
+ pkts[3]->data_len = total_length3;
+
+ pkts[0]->pkt_len = total_length0;
+ pkts[1]->pkt_len = total_length1;
+ pkts[2]->pkt_len = total_length2;
+ pkts[3]->pkt_len = total_length3;
+
+ if ((qinq == 0) && (mpls == 0)) {
+ *slab0_ptr0 = entry0->slab[0];
+ *slab0_ptr1 = entry1->slab[0];
+ *slab0_ptr2 = entry2->slab[0];
+ *slab0_ptr3 = entry3->slab[0];
+
+ if (arp == 0) {
+ MACADDR_DST_WRITE(slab1_ptr0, entry0->slab[1]);
+ MACADDR_DST_WRITE(slab1_ptr1, entry1->slab[1]);
+ MACADDR_DST_WRITE(slab1_ptr2, entry2->slab[1]);
+ MACADDR_DST_WRITE(slab1_ptr3, entry3->slab[1]);
+ }
+ }
+
+ if (qinq) {
+ *slab0_ptr0 = entry0->slab[0];
+ *slab0_ptr1 = entry1->slab[0];
+ *slab0_ptr2 = entry2->slab[0];
+ *slab0_ptr3 = entry3->slab[0];
+
+ *slab1_ptr0 = entry0->slab[1];
+ *slab1_ptr1 = entry1->slab[1];
+ *slab1_ptr2 = entry2->slab[1];
+ *slab1_ptr3 = entry3->slab[1];
+
+ if (arp == 0) {
+ MACADDR_DST_WRITE(slab2_ptr0, entry0->slab[2]);
+ MACADDR_DST_WRITE(slab2_ptr1, entry1->slab[2]);
+ MACADDR_DST_WRITE(slab2_ptr2, entry2->slab[2]);
+ MACADDR_DST_WRITE(slab2_ptr3, entry3->slab[2]);
+ }
+
+ if (qinq_sched) {
+ pkts[0]->hash.sched.lo = sched0 & 0xFFFFFFFF;
+ pkts[0]->hash.sched.hi = sched0 >> 32;
+ pkts[1]->hash.sched.lo = sched1 & 0xFFFFFFFF;
+ pkts[1]->hash.sched.hi = sched1 >> 32;
+ pkts[2]->hash.sched.lo = sched2 & 0xFFFFFFFF;
+ pkts[2]->hash.sched.hi = sched2 >> 32;
+ pkts[3]->hash.sched.lo = sched3 & 0xFFFFFFFF;
+ pkts[3]->hash.sched.hi = sched3 >> 32;
+ }
+ }
+
+ if (mpls) {
+ if (mpls_color_mark) {
+ uint64_t mpls_exp0 = rte_bswap64(
+ (MPLS_LABEL(0, pkt0_color, 0, 0) << 32) |
+ MPLS_LABEL(0, pkt0_color, 0, 0));
+ uint64_t mpls_exp1 = rte_bswap64(
+ (MPLS_LABEL(0, pkt1_color, 0, 0) << 32) |
+ MPLS_LABEL(0, pkt1_color, 0, 0));
+ uint64_t mpls_exp2 = rte_bswap64(
+ (MPLS_LABEL(0, pkt2_color, 0, 0) << 32) |
+ MPLS_LABEL(0, pkt2_color, 0, 0));
+ uint64_t mpls_exp3 = rte_bswap64(
+ (MPLS_LABEL(0, pkt3_color, 0, 0) << 32) |
+ MPLS_LABEL(0, pkt3_color, 0, 0));
+
+ *slab0_ptr0 = entry0->slab[0] | mpls_exp0;
+ *slab0_ptr1 = entry1->slab[0] | mpls_exp1;
+ *slab0_ptr2 = entry2->slab[0] | mpls_exp2;
+ *slab0_ptr3 = entry3->slab[0] | mpls_exp3;
+
+ *slab1_ptr0 = entry0->slab[1] | mpls_exp0;
+ *slab1_ptr1 = entry1->slab[1] | mpls_exp1;
+ *slab1_ptr2 = entry2->slab[1] | mpls_exp2;
+ *slab1_ptr3 = entry3->slab[1] | mpls_exp3;
+
+ *slab2_ptr0 = entry0->slab[2];
+ *slab2_ptr1 = entry1->slab[2];
+ *slab2_ptr2 = entry2->slab[2];
+ *slab2_ptr3 = entry3->slab[2];
+ } else {
+ *slab0_ptr0 = entry0->slab[0];
+ *slab0_ptr1 = entry1->slab[0];
+ *slab0_ptr2 = entry2->slab[0];
+ *slab0_ptr3 = entry3->slab[0];
+
+ *slab1_ptr0 = entry0->slab[1];
+ *slab1_ptr1 = entry1->slab[1];
+ *slab1_ptr2 = entry2->slab[1];
+ *slab1_ptr3 = entry3->slab[1];
+
+ *slab2_ptr0 = entry0->slab[2];
+ *slab2_ptr1 = entry1->slab[2];
+ *slab2_ptr2 = entry2->slab[2];
+ *slab2_ptr3 = entry3->slab[2];
+ }
+
+ if (arp == 0) {
+ MACADDR_DST_WRITE(slab3_ptr0, entry0->slab[3]);
+ MACADDR_DST_WRITE(slab3_ptr1, entry1->slab[3]);
+ MACADDR_DST_WRITE(slab3_ptr2, entry2->slab[3]);
+ MACADDR_DST_WRITE(slab3_ptr3, entry3->slab[3]);
+ }
+ }
+
+ if (arp) {
+ arp_key0->port_id = port_id0;
+ arp_key1->port_id = port_id1;
+ arp_key2->port_id = port_id2;
+ arp_key3->port_id = port_id3;
+
+ arp_key0->ip = nh_ip0;
+ arp_key1->ip = nh_ip1;
+ arp_key2->ip = nh_ip2;
+ arp_key3->ip = nh_ip3;
+ }
+}
+
+#define PKT_WORK_ROUTING_ETHERNET(arp) \
+static inline void \
+pkt_work_routing_ether_arp##arp( \
+ struct rte_mbuf *pkt, \
+ struct rte_pipeline_table_entry *table_entry, \
+ void *arg) \
+{ \
+ pkt_work_routing(pkt, table_entry, arg, arp, 0, 0, 0, 0);\
+}
+
+#define PKT4_WORK_ROUTING_ETHERNET(arp) \
+static inline void \
+pkt4_work_routing_ether_arp##arp( \
+ struct rte_mbuf **pkts, \
+ struct rte_pipeline_table_entry **table_entries, \
+ void *arg) \
+{ \
+ pkt4_work_routing(pkts, table_entries, arg, arp, 0, 0, 0, 0);\
+}
+
+#define routing_table_ah_hit_ether(arp) \
+PKT_WORK_ROUTING_ETHERNET(arp) \
+PKT4_WORK_ROUTING_ETHERNET(arp) \
+PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_arp##arp, \
+ pkt_work_routing_ether_arp##arp, \
+ pkt4_work_routing_ether_arp##arp)
+
+routing_table_ah_hit_ether(0)
+routing_table_ah_hit_ether(1)
+
+#define PKT_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \
+static inline void \
+pkt_work_routing_ether_qinq_sched##sched##_arp##arp( \
+ struct rte_mbuf *pkt, \
+ struct rte_pipeline_table_entry *table_entry, \
+ void *arg) \
+{ \
+ pkt_work_routing(pkt, table_entry, arg, arp, 1, sched, 0, 0);\
+}
+
+#define PKT4_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \
+static inline void \
+pkt4_work_routing_ether_qinq_sched##sched##_arp##arp( \
+ struct rte_mbuf **pkts, \
+ struct rte_pipeline_table_entry **table_entries, \
+ void *arg) \
+{ \
+ pkt4_work_routing(pkts, table_entries, arg, arp, 1, sched, 0, 0);\
+}
+
+#define routing_table_ah_hit_ether_qinq(sched, arp) \
+PKT_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \
+PKT4_WORK_ROUTING_ETHERNET_QINQ(sched, arp) \
+PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_qinq_sched##sched##_arp##arp,\
+ pkt_work_routing_ether_qinq_sched##sched##_arp##arp, \
+ pkt4_work_routing_ether_qinq_sched##sched##_arp##arp)
+
+routing_table_ah_hit_ether_qinq(0, 0)
+routing_table_ah_hit_ether_qinq(1, 0)
+routing_table_ah_hit_ether_qinq(2, 0)
+routing_table_ah_hit_ether_qinq(0, 1)
+routing_table_ah_hit_ether_qinq(1, 1)
+routing_table_ah_hit_ether_qinq(2, 1)
+
+#define PKT_WORK_ROUTING_ETHERNET_MPLS(color, arp) \
+static inline void \
+pkt_work_routing_ether_mpls_color##color##_arp##arp( \
+ struct rte_mbuf *pkt, \
+ struct rte_pipeline_table_entry *table_entry, \
+ void *arg) \
+{ \
+ pkt_work_routing(pkt, table_entry, arg, arp, 0, 0, 1, color);\
+}
+
+#define PKT4_WORK_ROUTING_ETHERNET_MPLS(color, arp) \
+static inline void \
+pkt4_work_routing_ether_mpls_color##color##_arp##arp( \
+ struct rte_mbuf **pkts, \
+ struct rte_pipeline_table_entry **table_entries, \
+ void *arg) \
+{ \
+ pkt4_work_routing(pkts, table_entries, arg, arp, 0, 0, 1, color);\
+}
+
+#define routing_table_ah_hit_ether_mpls(color, arp) \
+PKT_WORK_ROUTING_ETHERNET_MPLS(color, arp) \
+PKT4_WORK_ROUTING_ETHERNET_MPLS(color, arp) \
+PIPELINE_TABLE_AH_HIT(routing_table_ah_hit_ether_mpls_color##color##_arp##arp,\
+ pkt_work_routing_ether_mpls_color##color##_arp##arp, \
+ pkt4_work_routing_ether_mpls_color##color##_arp##arp)
+
+routing_table_ah_hit_ether_mpls(0, 0)
+routing_table_ah_hit_ether_mpls(1, 0)
+routing_table_ah_hit_ether_mpls(0, 1)
+routing_table_ah_hit_ether_mpls(1, 1)
+
+static rte_pipeline_table_action_handler_hit
+get_routing_table_ah_hit(struct pipeline_routing *p)
+{
+ if (p->params.dbg_ah_disable)
+ return NULL;
+
+ switch (p->params.encap) {
+ case PIPELINE_ROUTING_ENCAP_ETHERNET:
+ return (p->params.n_arp_entries) ?
+ routing_table_ah_hit_ether_arp1 :
+ routing_table_ah_hit_ether_arp0;
+
+ case PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ:
+ if (p->params.n_arp_entries)
+ switch (p->params.qinq_sched) {
+ case 0:
+ return routing_table_ah_hit_ether_qinq_sched0_arp1;
+ case 1:
+ return routing_table_ah_hit_ether_qinq_sched1_arp1;
+ case 2:
+ return routing_table_ah_hit_ether_qinq_sched2_arp1;
+ default:
+ return NULL;
+ }
+ else
+ switch (p->params.qinq_sched) {
+ case 0:
+ return routing_table_ah_hit_ether_qinq_sched0_arp0;
+ case 1:
+ return routing_table_ah_hit_ether_qinq_sched1_arp0;
+ case 2:
+ return routing_table_ah_hit_ether_qinq_sched2_arp0;
+ default:
+ return NULL;
+ }
+
+ case PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS:
+ if (p->params.n_arp_entries)
+ if (p->params.mpls_color_mark)
+ return routing_table_ah_hit_ether_mpls_color1_arp1;
+ else
+ return routing_table_ah_hit_ether_mpls_color0_arp1;
+ else
+ if (p->params.mpls_color_mark)
+ return routing_table_ah_hit_ether_mpls_color1_arp0;
+ else
+ return routing_table_ah_hit_ether_mpls_color0_arp0;
+
+ default:
+ return NULL;
+ }
+}
+
+/*
+ * ARP table
+ */
+struct arp_table_entry {
+ struct rte_pipeline_table_entry head;
+ uint64_t macaddr;
+};
+
+/**
+ * ARP table AH
+ */
+static inline void
+pkt_work_arp(
+ struct rte_mbuf *pkt,
+ struct rte_pipeline_table_entry *table_entry,
+ __rte_unused void *arg)
+{
+ struct arp_table_entry *entry = (struct arp_table_entry *) table_entry;
+
+ /* Read */
+ uint64_t macaddr_dst = entry->macaddr;
+ uint64_t *slab_ptr = (uint64_t *) ((char *) pkt->buf_addr +
+ (pkt->data_off - 2));
+
+ /* Compute */
+
+ /* Write */
+ MACADDR_DST_WRITE(slab_ptr, macaddr_dst);
+}
+
+static inline void
+pkt4_work_arp(
+ struct rte_mbuf **pkts,
+ struct rte_pipeline_table_entry **table_entries,
+ __rte_unused void *arg)
+{
+ struct arp_table_entry *entry0 =
+ (struct arp_table_entry *) table_entries[0];
+ struct arp_table_entry *entry1 =
+ (struct arp_table_entry *) table_entries[1];
+ struct arp_table_entry *entry2 =
+ (struct arp_table_entry *) table_entries[2];
+ struct arp_table_entry *entry3 =
+ (struct arp_table_entry *) table_entries[3];
+
+ /* Read */
+ uint64_t macaddr_dst0 = entry0->macaddr;
+ uint64_t macaddr_dst1 = entry1->macaddr;
+ uint64_t macaddr_dst2 = entry2->macaddr;
+ uint64_t macaddr_dst3 = entry3->macaddr;
+
+ uint64_t *slab_ptr0 = (uint64_t *) ((char *) pkts[0]->buf_addr +
+ (pkts[0]->data_off - 2));
+ uint64_t *slab_ptr1 = (uint64_t *) ((char *) pkts[1]->buf_addr +
+ (pkts[1]->data_off - 2));
+ uint64_t *slab_ptr2 = (uint64_t *) ((char *) pkts[2]->buf_addr +
+ (pkts[2]->data_off - 2));
+ uint64_t *slab_ptr3 = (uint64_t *) ((char *) pkts[3]->buf_addr +
+ (pkts[3]->data_off - 2));
+
+ /* Compute */
+
+ /* Write */
+ MACADDR_DST_WRITE(slab_ptr0, macaddr_dst0);
+ MACADDR_DST_WRITE(slab_ptr1, macaddr_dst1);
+ MACADDR_DST_WRITE(slab_ptr2, macaddr_dst2);
+ MACADDR_DST_WRITE(slab_ptr3, macaddr_dst3);
+}
+
+PIPELINE_TABLE_AH_HIT(arp_table_ah_hit,
+ pkt_work_arp,
+ pkt4_work_arp);
+
+static rte_pipeline_table_action_handler_hit
+get_arp_table_ah_hit(struct pipeline_routing *p)
+{
+ if (p->params.dbg_ah_disable)
+ return NULL;
+
+ return arp_table_ah_hit;
+}
+
+/*
+ * Argument parsing
+ */
+int
+pipeline_routing_parse_args(struct pipeline_routing_params *p,
+ struct pipeline_params *params)
+{
+ uint32_t n_routes_present = 0;
+ uint32_t encap_present = 0;
+ uint32_t qinq_sched_present = 0;
+ uint32_t mpls_color_mark_present = 0;
+ uint32_t n_arp_entries_present = 0;
+ uint32_t ip_hdr_offset_present = 0;
+ uint32_t arp_key_offset_present = 0;
+ uint32_t color_offset_present = 0;
+ uint32_t dbg_ah_disable_present = 0;
+ uint32_t i;
+
+ /* default values */
+ p->n_routes = PIPELINE_ROUTING_N_ROUTES_DEFAULT;
+ p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET;
+ p->qinq_sched = 0;
+ p->mpls_color_mark = 0;
+ p->n_arp_entries = 0;
+ p->dbg_ah_disable = 0;
+
+ for (i = 0; i < params->n_args; i++) {
+ char *arg_name = params->args_name[i];
+ char *arg_value = params->args_value[i];
+
+ /* n_routes */
+ if (strcmp(arg_name, "n_routes") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_routes_present == 0, params->name,
+ arg_name);
+ n_routes_present = 1;
+
+ status = parser_read_uint32(&p->n_routes,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL(((status != -EINVAL) &&
+ (p->n_routes != 0)), params->name,
+ arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* encap */
+ if (strcmp(arg_name, "encap") == 0) {
+ PIPELINE_PARSE_ERR_DUPLICATE(encap_present == 0,
+ params->name, arg_name);
+ encap_present = 1;
+
+ /* ethernet */
+ if (strcmp(arg_value, "ethernet") == 0) {
+ p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET;
+ continue;
+ }
+
+ /* ethernet_qinq */
+ if (strcmp(arg_value, "ethernet_qinq") == 0) {
+ p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ;
+ continue;
+ }
+
+ /* ethernet_mpls */
+ if (strcmp(arg_value, "ethernet_mpls") == 0) {
+ p->encap = PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS;
+ continue;
+ }
+
+ /* any other */
+ PIPELINE_PARSE_ERR_INV_VAL(0, params->name,
+ arg_name, arg_value);
+ }
+
+ /* qinq_sched */
+ if (strcmp(arg_name, "qinq_sched") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ qinq_sched_present == 0, params->name,
+ arg_name);
+ qinq_sched_present = 1;
+
+ status = parser_read_arg_bool(arg_value);
+ if (status == -EINVAL) {
+ if (strcmp(arg_value, "test") == 0) {
+ p->qinq_sched = 2;
+ continue;
+ }
+ } else {
+ p->qinq_sched = status;
+ continue;
+ }
+
+ PIPELINE_PARSE_ERR_INV_VAL(0, params->name,
+ arg_name, arg_value);
+ }
+
+ /* mpls_color_mark */
+ if (strcmp(arg_name, "mpls_color_mark") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ mpls_color_mark_present == 0,
+ params->name, arg_name);
+ mpls_color_mark_present = 1;
+
+
+ status = parser_read_arg_bool(arg_value);
+ if (status >= 0) {
+ p->mpls_color_mark = status;
+ continue;
+ }
+
+ PIPELINE_PARSE_ERR_INV_VAL(0, params->name,
+ arg_name, arg_value);
+ }
+
+ /* n_arp_entries */
+ if (strcmp(arg_name, "n_arp_entries") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ n_arp_entries_present == 0, params->name,
+ arg_name);
+ n_arp_entries_present = 1;
+
+ status = parser_read_uint32(&p->n_arp_entries,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* ip_hdr_offset */
+ if (strcmp(arg_name, "ip_hdr_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ ip_hdr_offset_present == 0, params->name,
+ arg_name);
+ ip_hdr_offset_present = 1;
+
+ status = parser_read_uint32(&p->ip_hdr_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* arp_key_offset */
+ if (strcmp(arg_name, "arp_key_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ arp_key_offset_present == 0, params->name,
+ arg_name);
+ arp_key_offset_present = 1;
+
+ status = parser_read_uint32(&p->arp_key_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* color_offset */
+ if (strcmp(arg_name, "color_offset") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ color_offset_present == 0, params->name,
+ arg_name);
+ color_offset_present = 1;
+
+ status = parser_read_uint32(&p->color_offset,
+ arg_value);
+ PIPELINE_PARSE_ERR_INV_VAL((status != -EINVAL),
+ params->name, arg_name, arg_value);
+ PIPELINE_PARSE_ERR_OUT_RNG((status != -ERANGE),
+ params->name, arg_name, arg_value);
+
+ continue;
+ }
+
+ /* debug */
+ if (strcmp(arg_name, "dbg_ah_disable") == 0) {
+ int status;
+
+ PIPELINE_PARSE_ERR_DUPLICATE(
+ dbg_ah_disable_present == 0, params->name,
+ arg_name);
+ dbg_ah_disable_present = 1;
+
+ status = parser_read_arg_bool(arg_value);
+ if (status >= 0) {
+ p->dbg_ah_disable = status;
+ continue;
+ }
+
+ PIPELINE_PARSE_ERR_INV_VAL(0, params->name,
+ arg_name, arg_value);
+
+ continue;
+ }
+
+ /* any other */
+ PIPELINE_PARSE_ERR_INV_ENT(0, params->name, arg_name);
+ }
+
+ /* Check that mandatory arguments are present */
+ PIPELINE_PARSE_ERR_MANDATORY(ip_hdr_offset_present, params->name,
+ "ip_hdr_offset");
+
+ /* Check relations between arguments */
+ switch (p->encap) {
+ case PIPELINE_ROUTING_ENCAP_ETHERNET:
+ PIPELINE_ARG_CHECK((!p->qinq_sched), "Parse error in "
+ "section \"%s\": encap = ethernet, therefore "
+ "qinq_sched = yes/test is not allowed",
+ params->name);
+ PIPELINE_ARG_CHECK((!p->mpls_color_mark), "Parse error "
+ "in section \"%s\": encap = ethernet, therefore "
+ "mpls_color_mark = yes is not allowed",
+ params->name);
+ PIPELINE_ARG_CHECK((!color_offset_present), "Parse error "
+ "in section \"%s\": encap = ethernet, therefore "
+ "color_offset is not allowed",
+ params->name);
+ break;
+
+ case PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ:
+ PIPELINE_ARG_CHECK((!p->mpls_color_mark), "Parse error "
+ "in section \"%s\": encap = ethernet_qinq, "
+ "therefore mpls_color_mark = yes is not allowed",
+ params->name);
+ PIPELINE_ARG_CHECK((!color_offset_present), "Parse error "
+ "in section \"%s\": encap = ethernet_qinq, "
+ "therefore color_offset is not allowed",
+ params->name);
+ break;
+
+ case PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS:
+ PIPELINE_ARG_CHECK((!p->qinq_sched), "Parse error in "
+ "section \"%s\": encap = ethernet_mpls, therefore "
+ "qinq_sched = yes/test is not allowed",
+ params->name);
+ break;
+ }
+
+ PIPELINE_ARG_CHECK((!(p->n_arp_entries &&
+ (!arp_key_offset_present))), "Parse error in section "
+ "\"%s\": n_arp_entries is set while "
+ "arp_key_offset is not set", params->name);
+
+ PIPELINE_ARG_CHECK((!((p->n_arp_entries == 0) &&
+ arp_key_offset_present)), "Parse error in section "
+ "\"%s\": arp_key_offset present while "
+ "n_arp_entries is not set", params->name);
+
+ return 0;
+}
+
+static void *
+pipeline_routing_init(struct pipeline_params *params,
+ __rte_unused void *arg)
+{
+ struct pipeline *p;
+ struct pipeline_routing *p_rt;
+ uint32_t size, i;
+
+ /* Check input arguments */
+ if ((params == NULL) ||
+ (params->n_ports_in == 0) ||
+ (params->n_ports_out == 0))
+ return NULL;
+
+ /* Memory allocation */
+ size = RTE_CACHE_LINE_ROUNDUP(sizeof(struct pipeline_routing));
+ p = rte_zmalloc(NULL, size, RTE_CACHE_LINE_SIZE);
+ p_rt = (struct pipeline_routing *) p;
+ if (p == NULL)
+ return NULL;
+
+ strcpy(p->name, params->name);
+ p->log_level = params->log_level;
+
+ PLOG(p, HIGH, "Routing");
+
+ /* Parse arguments */
+ if (pipeline_routing_parse_args(&p_rt->params, params))
+ return NULL;
+
+ /* Pipeline */
+ {
+ struct rte_pipeline_params pipeline_params = {
+ .name = params->name,
+ .socket_id = params->socket_id,
+ .offset_port_id = 0,
+ };
+
+ p->p = rte_pipeline_create(&pipeline_params);
+ if (p->p == NULL) {
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Input ports */
+ p->n_ports_in = params->n_ports_in;
+ for (i = 0; i < p->n_ports_in; i++) {
+ struct rte_pipeline_port_in_params port_params = {
+ .ops = pipeline_port_in_params_get_ops(
+ &params->port_in[i]),
+ .arg_create = pipeline_port_in_params_convert(
+ &params->port_in[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ .burst_size = params->port_in[i].burst_size,
+ };
+
+ int status = rte_pipeline_port_in_create(p->p,
+ &port_params,
+ &p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Output ports */
+ p->n_ports_out = params->n_ports_out;
+ for (i = 0; i < p->n_ports_out; i++) {
+ struct rte_pipeline_port_out_params port_params = {
+ .ops = pipeline_port_out_params_get_ops(
+ &params->port_out[i]),
+ .arg_create = pipeline_port_out_params_convert(
+ &params->port_out[i]),
+ .f_action = NULL,
+ .arg_ah = NULL,
+ };
+
+ int status = rte_pipeline_port_out_create(p->p,
+ &port_params,
+ &p->port_out_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Routing table */
+ p->n_tables = 1;
+ {
+ struct rte_table_lpm_params table_lpm_params = {
+ .name = p->name,
+ .n_rules = p_rt->params.n_routes,
+ .number_tbl8s = PIPELINE_ROUTING_LPM_TABLE_NUMBER_TABLE8s,
+ .flags = 0,
+ .entry_unique_size = sizeof(struct routing_table_entry),
+ .offset = p_rt->params.ip_hdr_offset +
+ __builtin_offsetof(struct ipv4_hdr, dst_addr),
+ };
+
+ struct rte_pipeline_table_params table_params = {
+ .ops = &rte_table_lpm_ops,
+ .arg_create = &table_lpm_params,
+ .f_action_hit = get_routing_table_ah_hit(p_rt),
+ .f_action_miss = NULL,
+ .arg_ah = p_rt,
+ .action_data_size =
+ sizeof(struct routing_table_entry) -
+ sizeof(struct rte_pipeline_table_entry),
+ };
+
+ int status;
+
+ status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* ARP table configuration */
+ if (p_rt->params.n_arp_entries) {
+ struct rte_table_hash_key8_ext_params table_arp_params = {
+ .n_entries = p_rt->params.n_arp_entries,
+ .n_entries_ext = p_rt->params.n_arp_entries,
+ .f_hash = hash_default_key8,
+ .seed = 0,
+ .signature_offset = 0, /* Unused */
+ .key_offset = p_rt->params.arp_key_offset,
+ };
+
+ struct rte_pipeline_table_params table_params = {
+ .ops = &rte_table_hash_key8_ext_dosig_ops,
+ .arg_create = &table_arp_params,
+ .f_action_hit = get_arp_table_ah_hit(p_rt),
+ .f_action_miss = NULL,
+ .arg_ah = p_rt,
+ .action_data_size = sizeof(struct arp_table_entry) -
+ sizeof(struct rte_pipeline_table_entry),
+ };
+
+ int status;
+
+ status = rte_pipeline_table_create(p->p,
+ &table_params,
+ &p->table_id[1]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ p->n_tables++;
+ }
+
+ /* Connecting input ports to tables */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_connect_to_table(p->p,
+ p->port_in_id[i],
+ p->table_id[0]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Enable input ports */
+ for (i = 0; i < p->n_ports_in; i++) {
+ int status = rte_pipeline_port_in_enable(p->p,
+ p->port_in_id[i]);
+
+ if (status) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+ }
+
+ /* Check pipeline consistency */
+ if (rte_pipeline_check(p->p) < 0) {
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return NULL;
+ }
+
+ /* Message queues */
+ p->n_msgq = params->n_msgq;
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_in[i] = params->msgq_in[i];
+ for (i = 0; i < p->n_msgq; i++)
+ p->msgq_out[i] = params->msgq_out[i];
+
+ /* Message handlers */
+ memcpy(p->handlers, handlers, sizeof(p->handlers));
+ memcpy(p_rt->custom_handlers,
+ custom_handlers,
+ sizeof(p_rt->custom_handlers));
+
+ return p;
+}
+
+static int
+pipeline_routing_free(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if (p == NULL)
+ return -1;
+
+ /* Free resources */
+ rte_pipeline_free(p->p);
+ rte_free(p);
+ return 0;
+}
+
+static int
+pipeline_routing_track(void *pipeline,
+ __rte_unused uint32_t port_in,
+ uint32_t *port_out)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ /* Check input arguments */
+ if ((p == NULL) ||
+ (port_in >= p->n_ports_in) ||
+ (port_out == NULL))
+ return -1;
+
+ if (p->n_ports_in == 1) {
+ *port_out = 0;
+ return 0;
+ }
+
+ return -1;
+}
+
+static int
+pipeline_routing_timer(void *pipeline)
+{
+ struct pipeline *p = (struct pipeline *) pipeline;
+
+ pipeline_msg_req_handle(p);
+ rte_pipeline_flush(p->p);
+
+ return 0;
+}
+
+void *
+pipeline_routing_msg_req_custom_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_routing *p_rt = (struct pipeline_routing *) p;
+ struct pipeline_custom_msg_req *req = msg;
+ pipeline_msg_req_handler f_handle;
+
+ f_handle = (req->subtype < PIPELINE_ROUTING_MSG_REQS) ?
+ p_rt->custom_handlers[req->subtype] :
+ pipeline_msg_req_invalid_handler;
+
+ if (f_handle == NULL)
+ f_handle = pipeline_msg_req_invalid_handler;
+
+ return f_handle(p, req);
+}
+
+void *
+pipeline_routing_msg_req_route_add_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing *p_rt = (struct pipeline_routing *) p;
+ struct pipeline_routing_route_add_msg_req *req = msg;
+ struct pipeline_routing_route_add_msg_rsp *rsp = msg;
+
+ struct rte_table_lpm_key key = {
+ .ip = req->key.key.ipv4.ip,
+ .depth = req->key.key.ipv4.depth,
+ };
+
+ struct routing_table_entry entry_arp0 = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->data.port_id]},
+ },
+
+ .flags = req->data.flags,
+ .port_id = req->data.port_id,
+ .ip = 0,
+ .data_offset = 0,
+ .ether_l2_length = 0,
+ .slab = {0},
+ .slab_offset = {0},
+ };
+
+ struct routing_table_entry entry_arp1 = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_TABLE,
+ {.table_id = p->table_id[1]},
+ },
+
+ .flags = req->data.flags,
+ .port_id = req->data.port_id,
+ .ip = rte_bswap32(req->data.ethernet.ip),
+ .data_offset = 0,
+ .ether_l2_length = 0,
+ .slab = {0},
+ .slab_offset = {0},
+ };
+
+ struct rte_pipeline_table_entry *entry = (p_rt->params.n_arp_entries) ?
+ (struct rte_pipeline_table_entry *) &entry_arp1 :
+ (struct rte_pipeline_table_entry *) &entry_arp0;
+
+ if ((req->key.type != PIPELINE_ROUTING_ROUTE_IPV4) ||
+ ((p_rt->params.n_arp_entries == 0) &&
+ (req->data.flags & PIPELINE_ROUTING_ROUTE_ARP)) ||
+ (p_rt->params.n_arp_entries &&
+ ((req->data.flags & PIPELINE_ROUTING_ROUTE_ARP) == 0)) ||
+ ((p_rt->params.encap != PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) &&
+ (req->data.flags & PIPELINE_ROUTING_ROUTE_QINQ)) ||
+ ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) &&
+ ((req->data.flags & PIPELINE_ROUTING_ROUTE_QINQ) == 0)) ||
+ ((p_rt->params.encap != PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) &&
+ (req->data.flags & PIPELINE_ROUTING_ROUTE_MPLS)) ||
+ ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) &&
+ ((req->data.flags & PIPELINE_ROUTING_ROUTE_MPLS) == 0))) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ /* Ether - ARP off */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET) &&
+ (p_rt->params.n_arp_entries == 0)) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t macaddr_dst;
+ uint64_t ethertype = ETHER_TYPE_IPv4;
+
+ macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr));
+ macaddr_dst = rte_bswap64(macaddr_dst << 16);
+
+ entry_arp0.slab[0] =
+ rte_bswap64((macaddr_src << 16) | ethertype);
+ entry_arp0.slab_offset[0] = p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64(macaddr_dst);
+ entry_arp0.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8;
+
+ entry_arp0.data_offset = entry_arp0.slab_offset[1] + 2
+ - sizeof(struct rte_mbuf);
+ entry_arp0.ether_l2_length = 14;
+ }
+
+ /* Ether - ARP on */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET) &&
+ p_rt->params.n_arp_entries) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t ethertype = ETHER_TYPE_IPv4;
+
+ entry_arp1.slab[0] = rte_bswap64((macaddr_src << 16) |
+ ethertype);
+ entry_arp1.slab_offset[0] = p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.data_offset = entry_arp1.slab_offset[0] - 6
+ - sizeof(struct rte_mbuf);
+ entry_arp1.ether_l2_length = 14;
+ }
+
+ /* Ether QinQ - ARP off */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) &&
+ (p_rt->params.n_arp_entries == 0)) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t macaddr_dst;
+ uint64_t ethertype_ipv4 = ETHER_TYPE_IPv4;
+ uint64_t ethertype_vlan = 0x8100;
+ uint64_t ethertype_qinq = 0x9100;
+ uint64_t svlan = req->data.l2.qinq.svlan;
+ uint64_t cvlan = req->data.l2.qinq.cvlan;
+
+ macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr));
+ macaddr_dst = rte_bswap64(macaddr_dst << 16);
+
+ entry_arp0.slab[0] = rte_bswap64((svlan << 48) |
+ (ethertype_vlan << 32) |
+ (cvlan << 16) |
+ ethertype_ipv4);
+ entry_arp0.slab_offset[0] = p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64((macaddr_src << 16) |
+ ethertype_qinq);
+ entry_arp0.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8;
+
+ entry_arp0.slab[2] = rte_bswap64(macaddr_dst);
+ entry_arp0.slab_offset[2] = p_rt->params.ip_hdr_offset - 3 * 8;
+
+ entry_arp0.data_offset = entry_arp0.slab_offset[2] + 2
+ - sizeof(struct rte_mbuf);
+ entry_arp0.ether_l2_length = 22;
+ }
+
+ /* Ether QinQ - ARP on */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ) &&
+ p_rt->params.n_arp_entries) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t ethertype_ipv4 = ETHER_TYPE_IPv4;
+ uint64_t ethertype_vlan = 0x8100;
+ uint64_t ethertype_qinq = 0x9100;
+ uint64_t svlan = req->data.l2.qinq.svlan;
+ uint64_t cvlan = req->data.l2.qinq.cvlan;
+
+ entry_arp1.slab[0] = rte_bswap64((svlan << 48) |
+ (ethertype_vlan << 32) |
+ (cvlan << 16) |
+ ethertype_ipv4);
+ entry_arp1.slab_offset[0] = p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.slab[1] = rte_bswap64((macaddr_src << 16) |
+ ethertype_qinq);
+ entry_arp1.slab_offset[1] = p_rt->params.ip_hdr_offset - 2 * 8;
+
+ entry_arp1.data_offset = entry_arp1.slab_offset[1] - 6
+ - sizeof(struct rte_mbuf);
+ entry_arp1.ether_l2_length = 22;
+ }
+
+ /* Ether MPLS - ARP off */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) &&
+ (p_rt->params.n_arp_entries == 0)) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t macaddr_dst;
+ uint64_t ethertype_mpls = 0x8847;
+
+ uint64_t label0 = req->data.l2.mpls.labels[0];
+ uint64_t label1 = req->data.l2.mpls.labels[1];
+ uint64_t label2 = req->data.l2.mpls.labels[2];
+ uint64_t label3 = req->data.l2.mpls.labels[3];
+ uint32_t n_labels = req->data.l2.mpls.n_labels;
+
+ macaddr_dst = *((uint64_t *)&(req->data.ethernet.macaddr));
+ macaddr_dst = rte_bswap64(macaddr_dst << 16);
+
+ switch (n_labels) {
+ case 1:
+ entry_arp0.slab[0] = 0;
+ entry_arp0.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64(
+ MPLS_LABEL(label0, 0, 1, 0));
+ entry_arp0.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 8;
+ break;
+
+ case 2:
+ entry_arp0.slab[0] = 0;
+ entry_arp0.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64(
+ (MPLS_LABEL(label0, 0, 0, 0) << 32) |
+ MPLS_LABEL(label1, 0, 1, 0));
+ entry_arp0.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 8;
+ break;
+
+ case 3:
+ entry_arp0.slab[0] = rte_bswap64(
+ (MPLS_LABEL(label1, 0, 0, 0) << 32) |
+ MPLS_LABEL(label2, 0, 1, 0));
+ entry_arp0.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64(
+ MPLS_LABEL(label0, 0, 0, 0));
+ entry_arp0.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 2 * 8;
+ break;
+
+ case 4:
+ entry_arp0.slab[0] = rte_bswap64(
+ (MPLS_LABEL(label2, 0, 0, 0) << 32) |
+ MPLS_LABEL(label3, 0, 1, 0));
+ entry_arp0.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp0.slab[1] = rte_bswap64(
+ (MPLS_LABEL(label0, 0, 0, 0) << 32) |
+ MPLS_LABEL(label1, 0, 0, 0));
+ entry_arp0.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 2 * 8;
+ break;
+
+ default:
+ rsp->status = -1;
+ return rsp;
+ }
+
+ entry_arp0.slab[2] = rte_bswap64((macaddr_src << 16) |
+ ethertype_mpls);
+ entry_arp0.slab_offset[2] = p_rt->params.ip_hdr_offset -
+ (n_labels * 4 + 8);
+
+ entry_arp0.slab[3] = rte_bswap64(macaddr_dst);
+ entry_arp0.slab_offset[3] = p_rt->params.ip_hdr_offset -
+ (n_labels * 4 + 2 * 8);
+
+ entry_arp0.data_offset = entry_arp0.slab_offset[3] + 2
+ - sizeof(struct rte_mbuf);
+ entry_arp0.ether_l2_length = n_labels * 4 + 14;
+ }
+
+ /* Ether MPLS - ARP on */
+ if ((p_rt->params.encap == PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS) &&
+ p_rt->params.n_arp_entries) {
+ uint64_t macaddr_src = MAC_SRC_DEFAULT;
+ uint64_t ethertype_mpls = 0x8847;
+
+ uint64_t label0 = req->data.l2.mpls.labels[0];
+ uint64_t label1 = req->data.l2.mpls.labels[1];
+ uint64_t label2 = req->data.l2.mpls.labels[2];
+ uint64_t label3 = req->data.l2.mpls.labels[3];
+ uint32_t n_labels = req->data.l2.mpls.n_labels;
+
+ switch (n_labels) {
+ case 1:
+ entry_arp1.slab[0] = 0;
+ entry_arp1.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.slab[1] = rte_bswap64(
+ MPLS_LABEL(label0, 0, 1, 0));
+ entry_arp1.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 8;
+ break;
+
+ case 2:
+ entry_arp1.slab[0] = 0;
+ entry_arp1.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.slab[1] = rte_bswap64(
+ (MPLS_LABEL(label0, 0, 0, 0) << 32) |
+ MPLS_LABEL(label1, 0, 1, 0));
+ entry_arp1.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 8;
+ break;
+
+ case 3:
+ entry_arp1.slab[0] = rte_bswap64(
+ (MPLS_LABEL(label1, 0, 0, 0) << 32) |
+ MPLS_LABEL(label2, 0, 1, 0));
+ entry_arp1.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.slab[1] = rte_bswap64(
+ MPLS_LABEL(label0, 0, 0, 0));
+ entry_arp1.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 2 * 8;
+ break;
+
+ case 4:
+ entry_arp1.slab[0] = rte_bswap64(
+ (MPLS_LABEL(label2, 0, 0, 0) << 32) |
+ MPLS_LABEL(label3, 0, 1, 0));
+ entry_arp1.slab_offset[0] =
+ p_rt->params.ip_hdr_offset - 8;
+
+ entry_arp1.slab[1] = rte_bswap64(
+ (MPLS_LABEL(label0, 0, 0, 0) << 32) |
+ MPLS_LABEL(label1, 0, 0, 0));
+ entry_arp1.slab_offset[1] =
+ p_rt->params.ip_hdr_offset - 2 * 8;
+ break;
+
+ default:
+ rsp->status = -1;
+ return rsp;
+ }
+
+ entry_arp1.slab[2] = rte_bswap64((macaddr_src << 16) |
+ ethertype_mpls);
+ entry_arp1.slab_offset[2] = p_rt->params.ip_hdr_offset -
+ (n_labels * 4 + 8);
+
+ entry_arp1.data_offset = entry_arp1.slab_offset[2] - 6
+ - sizeof(struct rte_mbuf);
+ entry_arp1.ether_l2_length = n_labels * 4 + 14;
+ }
+
+ rsp->status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[0],
+ &key,
+ entry,
+ &rsp->key_found,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_route_del_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing_route_delete_msg_req *req = msg;
+ struct pipeline_routing_route_delete_msg_rsp *rsp = msg;
+
+ struct rte_table_lpm_key key = {
+ .ip = req->key.key.ipv4.ip,
+ .depth = req->key.key.ipv4.depth,
+ };
+
+ if (req->key.type != PIPELINE_ROUTING_ROUTE_IPV4) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ rsp->status = rte_pipeline_table_entry_delete(p->p,
+ p->table_id[0],
+ &key,
+ &rsp->key_found,
+ NULL);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_route_add_default_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_routing_route_add_default_msg_req *req = msg;
+ struct pipeline_routing_route_add_default_msg_rsp *rsp = msg;
+
+ struct routing_table_entry default_entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+
+ .flags = 0,
+ .port_id = 0,
+ .ip = 0,
+ };
+
+ rsp->status = rte_pipeline_table_default_entry_add(p->p,
+ p->table_id[0],
+ (struct rte_pipeline_table_entry *) &default_entry,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_route_del_default_handler(struct pipeline *p,
+ void *msg)
+{
+ struct pipeline_routing_route_delete_default_msg_rsp *rsp = msg;
+
+ rsp->status = rte_pipeline_table_default_entry_delete(p->p,
+ p->table_id[0],
+ NULL);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_arp_add_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing_arp_add_msg_req *req = msg;
+ struct pipeline_routing_arp_add_msg_rsp *rsp = msg;
+
+ struct pipeline_routing_arp_key_ipv4 key = {
+ .port_id = req->key.key.ipv4.port_id,
+ .ip = rte_bswap32(req->key.key.ipv4.ip),
+ };
+
+ struct arp_table_entry entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->key.key.ipv4.port_id]},
+ },
+
+ .macaddr = 0, /* set below */
+ };
+
+ if (req->key.type != PIPELINE_ROUTING_ARP_IPV4) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ entry.macaddr = *((uint64_t *)&(req->macaddr));
+ entry.macaddr = entry.macaddr << 16;
+
+ rsp->status = rte_pipeline_table_entry_add(p->p,
+ p->table_id[1],
+ &key,
+ (struct rte_pipeline_table_entry *) &entry,
+ &rsp->key_found,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_arp_del_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing_arp_delete_msg_req *req = msg;
+ struct pipeline_routing_arp_delete_msg_rsp *rsp = msg;
+
+ struct pipeline_routing_arp_key_ipv4 key = {
+ .port_id = req->key.key.ipv4.port_id,
+ .ip = rte_bswap32(req->key.key.ipv4.ip),
+ };
+
+ if (req->key.type != PIPELINE_ROUTING_ARP_IPV4) {
+ rsp->status = -1;
+ return rsp;
+ }
+
+ rsp->status = rte_pipeline_table_entry_delete(p->p,
+ p->table_id[1],
+ &key,
+ &rsp->key_found,
+ NULL);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_arp_add_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing_arp_add_default_msg_req *req = msg;
+ struct pipeline_routing_arp_add_default_msg_rsp *rsp = msg;
+
+ struct arp_table_entry default_entry = {
+ .head = {
+ .action = RTE_PIPELINE_ACTION_PORT,
+ {.port_id = p->port_out_id[req->port_id]},
+ },
+
+ .macaddr = 0,
+ };
+
+ rsp->status = rte_pipeline_table_default_entry_add(p->p,
+ p->table_id[1],
+ (struct rte_pipeline_table_entry *) &default_entry,
+ (struct rte_pipeline_table_entry **) &rsp->entry_ptr);
+
+ return rsp;
+}
+
+void *
+pipeline_routing_msg_req_arp_del_default_handler(struct pipeline *p, void *msg)
+{
+ struct pipeline_routing_arp_delete_default_msg_rsp *rsp = msg;
+
+ rsp->status = rte_pipeline_table_default_entry_delete(p->p,
+ p->table_id[1],
+ NULL);
+
+ return rsp;
+}
+
+struct pipeline_be_ops pipeline_routing_be_ops = {
+ .f_init = pipeline_routing_init,
+ .f_free = pipeline_routing_free,
+ .f_run = NULL,
+ .f_timer = pipeline_routing_timer,
+ .f_track = pipeline_routing_track,
+};
diff --git a/examples/ip_pipeline/pipeline/pipeline_routing_be.h b/examples/ip_pipeline/pipeline/pipeline_routing_be.h
new file mode 100644
index 00000000..ec767b24
--- /dev/null
+++ b/examples/ip_pipeline/pipeline/pipeline_routing_be.h
@@ -0,0 +1,296 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_ROUTING_BE_H__
+#define __INCLUDE_PIPELINE_ROUTING_BE_H__
+
+#include <rte_ether.h>
+
+#include "pipeline_common_be.h"
+
+/*
+ * Pipeline argument parsing
+ */
+#ifndef PIPELINE_ROUTING_N_ROUTES_DEFAULT
+#define PIPELINE_ROUTING_N_ROUTES_DEFAULT 4096
+#endif
+
+enum pipeline_routing_encap {
+ PIPELINE_ROUTING_ENCAP_ETHERNET = 0,
+ PIPELINE_ROUTING_ENCAP_ETHERNET_QINQ,
+ PIPELINE_ROUTING_ENCAP_ETHERNET_MPLS,
+};
+
+struct pipeline_routing_params {
+ /* routing */
+ uint32_t n_routes;
+
+ /* routing packet encapsulation */
+ enum pipeline_routing_encap encap;
+ uint32_t qinq_sched;
+ uint32_t mpls_color_mark;
+
+ /* arp */
+ uint32_t n_arp_entries;
+
+ /* packet buffer offsets */
+ uint32_t ip_hdr_offset;
+ uint32_t arp_key_offset;
+ uint32_t color_offset;
+
+ /* debug */
+ uint32_t dbg_ah_disable;
+};
+
+int
+pipeline_routing_parse_args(struct pipeline_routing_params *p,
+ struct pipeline_params *params);
+
+/*
+ * Route
+ */
+enum pipeline_routing_route_key_type {
+ PIPELINE_ROUTING_ROUTE_IPV4,
+};
+
+struct pipeline_routing_route_key_ipv4 {
+ uint32_t ip;
+ uint32_t depth;
+};
+
+struct pipeline_routing_route_key {
+ enum pipeline_routing_route_key_type type;
+ union {
+ struct pipeline_routing_route_key_ipv4 ipv4;
+ } key;
+};
+
+enum pipeline_routing_route_flags {
+ PIPELINE_ROUTING_ROUTE_LOCAL = 1 << 0, /* 0 = remote; 1 = local */
+ PIPELINE_ROUTING_ROUTE_ARP = 1 << 1, /* 0 = ARP OFF; 1 = ARP ON */
+ PIPELINE_ROUTING_ROUTE_QINQ = 1 << 2, /* 0 = QINQ OFF; 1 = QINQ ON */
+ PIPELINE_ROUTING_ROUTE_MPLS = 1 << 3, /* 0 = MPLS OFF; 1 = MPLS ON */
+};
+
+#define PIPELINE_ROUTING_MPLS_LABELS_MAX 4
+
+struct pipeline_routing_route_data {
+ uint32_t flags;
+ uint32_t port_id; /* Output port ID */
+
+ union {
+ /* Next hop IP (valid only when ARP is enabled) */
+ uint32_t ip;
+
+ /* Next hop MAC address (valid only when ARP disabled */
+ struct ether_addr macaddr;
+ } ethernet;
+
+ union {
+ struct {
+ uint16_t svlan;
+ uint16_t cvlan;
+ } qinq;
+
+ struct {
+ uint32_t labels[PIPELINE_ROUTING_MPLS_LABELS_MAX];
+ uint32_t n_labels;
+ } mpls;
+ } l2;
+};
+
+/*
+ * ARP
+ */
+enum pipeline_routing_arp_key_type {
+ PIPELINE_ROUTING_ARP_IPV4,
+};
+
+struct pipeline_routing_arp_key_ipv4 {
+ uint32_t port_id;
+ uint32_t ip;
+};
+
+struct pipeline_routing_arp_key {
+ enum pipeline_routing_arp_key_type type;
+ union {
+ struct pipeline_routing_arp_key_ipv4 ipv4;
+ } key;
+};
+
+/*
+ * Messages
+ */
+enum pipeline_routing_msg_req_type {
+ PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD,
+ PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL,
+ PIPELINE_ROUTING_MSG_REQ_ROUTE_ADD_DEFAULT,
+ PIPELINE_ROUTING_MSG_REQ_ROUTE_DEL_DEFAULT,
+ PIPELINE_ROUTING_MSG_REQ_ARP_ADD,
+ PIPELINE_ROUTING_MSG_REQ_ARP_DEL,
+ PIPELINE_ROUTING_MSG_REQ_ARP_ADD_DEFAULT,
+ PIPELINE_ROUTING_MSG_REQ_ARP_DEL_DEFAULT,
+ PIPELINE_ROUTING_MSG_REQS
+};
+
+/*
+ * MSG ROUTE ADD
+ */
+struct pipeline_routing_route_add_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_routing_route_key key;
+
+ /* data */
+ struct pipeline_routing_route_data data;
+};
+
+struct pipeline_routing_route_add_msg_rsp {
+ int status;
+ int key_found;
+ void *entry_ptr;
+};
+
+/*
+ * MSG ROUTE DELETE
+ */
+struct pipeline_routing_route_delete_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_routing_route_key key;
+};
+
+struct pipeline_routing_route_delete_msg_rsp {
+ int status;
+ int key_found;
+};
+
+/*
+ * MSG ROUTE ADD DEFAULT
+ */
+struct pipeline_routing_route_add_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* data */
+ uint32_t port_id;
+};
+
+struct pipeline_routing_route_add_default_msg_rsp {
+ int status;
+ void *entry_ptr;
+};
+
+/*
+ * MSG ROUTE DELETE DEFAULT
+ */
+struct pipeline_routing_route_delete_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+};
+
+struct pipeline_routing_route_delete_default_msg_rsp {
+ int status;
+};
+
+/*
+ * MSG ARP ADD
+ */
+struct pipeline_routing_arp_add_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_routing_arp_key key;
+
+ /* data */
+ struct ether_addr macaddr;
+};
+
+struct pipeline_routing_arp_add_msg_rsp {
+ int status;
+ int key_found;
+ void *entry_ptr;
+};
+
+/*
+ * MSG ARP DELETE
+ */
+struct pipeline_routing_arp_delete_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* key */
+ struct pipeline_routing_arp_key key;
+};
+
+struct pipeline_routing_arp_delete_msg_rsp {
+ int status;
+ int key_found;
+};
+
+/*
+ * MSG ARP ADD DEFAULT
+ */
+struct pipeline_routing_arp_add_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+
+ /* data */
+ uint32_t port_id;
+};
+
+struct pipeline_routing_arp_add_default_msg_rsp {
+ int status;
+ void *entry_ptr;
+};
+
+/*
+ * MSG ARP DELETE DEFAULT
+ */
+struct pipeline_routing_arp_delete_default_msg_req {
+ enum pipeline_msg_req_type type;
+ enum pipeline_routing_msg_req_type subtype;
+};
+
+struct pipeline_routing_arp_delete_default_msg_rsp {
+ int status;
+};
+
+extern struct pipeline_be_ops pipeline_routing_be_ops;
+
+#endif
diff --git a/examples/ip_pipeline/pipeline_be.h b/examples/ip_pipeline/pipeline_be.h
new file mode 100644
index 00000000..f4ff262e
--- /dev/null
+++ b/examples/ip_pipeline/pipeline_be.h
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_PIPELINE_BE_H__
+#define __INCLUDE_PIPELINE_BE_H__
+
+#include <rte_port_ethdev.h>
+#include <rte_port_ring.h>
+#include <rte_port_frag.h>
+#include <rte_port_ras.h>
+#include <rte_port_sched.h>
+#include <rte_port_source_sink.h>
+#include <rte_pipeline.h>
+
+enum pipeline_port_in_type {
+ PIPELINE_PORT_IN_ETHDEV_READER,
+ PIPELINE_PORT_IN_RING_READER,
+ PIPELINE_PORT_IN_RING_MULTI_READER,
+ PIPELINE_PORT_IN_RING_READER_IPV4_FRAG,
+ PIPELINE_PORT_IN_RING_READER_IPV6_FRAG,
+ PIPELINE_PORT_IN_SCHED_READER,
+ PIPELINE_PORT_IN_SOURCE,
+};
+
+struct pipeline_port_in_params {
+ enum pipeline_port_in_type type;
+ union {
+ struct rte_port_ethdev_reader_params ethdev;
+ struct rte_port_ring_reader_params ring;
+ struct rte_port_ring_multi_reader_params ring_multi;
+ struct rte_port_ring_reader_ipv4_frag_params ring_ipv4_frag;
+ struct rte_port_ring_reader_ipv6_frag_params ring_ipv6_frag;
+ struct rte_port_sched_reader_params sched;
+ struct rte_port_source_params source;
+ } params;
+ uint32_t burst_size;
+};
+
+static inline void *
+pipeline_port_in_params_convert(struct pipeline_port_in_params *p)
+{
+ switch (p->type) {
+ case PIPELINE_PORT_IN_ETHDEV_READER:
+ return (void *) &p->params.ethdev;
+ case PIPELINE_PORT_IN_RING_READER:
+ return (void *) &p->params.ring;
+ case PIPELINE_PORT_IN_RING_MULTI_READER:
+ return (void *) &p->params.ring_multi;
+ case PIPELINE_PORT_IN_RING_READER_IPV4_FRAG:
+ return (void *) &p->params.ring_ipv4_frag;
+ case PIPELINE_PORT_IN_RING_READER_IPV6_FRAG:
+ return (void *) &p->params.ring_ipv6_frag;
+ case PIPELINE_PORT_IN_SCHED_READER:
+ return (void *) &p->params.sched;
+ case PIPELINE_PORT_IN_SOURCE:
+ return (void *) &p->params.source;
+ default:
+ return NULL;
+ }
+}
+
+static inline struct rte_port_in_ops *
+pipeline_port_in_params_get_ops(struct pipeline_port_in_params *p)
+{
+ switch (p->type) {
+ case PIPELINE_PORT_IN_ETHDEV_READER:
+ return &rte_port_ethdev_reader_ops;
+ case PIPELINE_PORT_IN_RING_READER:
+ return &rte_port_ring_reader_ops;
+ case PIPELINE_PORT_IN_RING_MULTI_READER:
+ return &rte_port_ring_multi_reader_ops;
+ case PIPELINE_PORT_IN_RING_READER_IPV4_FRAG:
+ return &rte_port_ring_reader_ipv4_frag_ops;
+ case PIPELINE_PORT_IN_RING_READER_IPV6_FRAG:
+ return &rte_port_ring_reader_ipv6_frag_ops;
+ case PIPELINE_PORT_IN_SCHED_READER:
+ return &rte_port_sched_reader_ops;
+ case PIPELINE_PORT_IN_SOURCE:
+ return &rte_port_source_ops;
+ default:
+ return NULL;
+ }
+}
+
+enum pipeline_port_out_type {
+ PIPELINE_PORT_OUT_ETHDEV_WRITER,
+ PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP,
+ PIPELINE_PORT_OUT_RING_WRITER,
+ PIPELINE_PORT_OUT_RING_MULTI_WRITER,
+ PIPELINE_PORT_OUT_RING_WRITER_NODROP,
+ PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP,
+ PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS,
+ PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS,
+ PIPELINE_PORT_OUT_SCHED_WRITER,
+ PIPELINE_PORT_OUT_SINK,
+};
+
+struct pipeline_port_out_params {
+ enum pipeline_port_out_type type;
+ union {
+ struct rte_port_ethdev_writer_params ethdev;
+ struct rte_port_ethdev_writer_nodrop_params ethdev_nodrop;
+ struct rte_port_ring_writer_params ring;
+ struct rte_port_ring_multi_writer_params ring_multi;
+ struct rte_port_ring_writer_nodrop_params ring_nodrop;
+ struct rte_port_ring_multi_writer_nodrop_params ring_multi_nodrop;
+ struct rte_port_ring_writer_ipv4_ras_params ring_ipv4_ras;
+ struct rte_port_ring_writer_ipv6_ras_params ring_ipv6_ras;
+ struct rte_port_sched_writer_params sched;
+ struct rte_port_sink_params sink;
+ } params;
+};
+
+static inline void *
+pipeline_port_out_params_convert(struct pipeline_port_out_params *p)
+{
+ switch (p->type) {
+ case PIPELINE_PORT_OUT_ETHDEV_WRITER:
+ return (void *) &p->params.ethdev;
+ case PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP:
+ return (void *) &p->params.ethdev_nodrop;
+ case PIPELINE_PORT_OUT_RING_WRITER:
+ return (void *) &p->params.ring;
+ case PIPELINE_PORT_OUT_RING_MULTI_WRITER:
+ return (void *) &p->params.ring_multi;
+ case PIPELINE_PORT_OUT_RING_WRITER_NODROP:
+ return (void *) &p->params.ring_nodrop;
+ case PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP:
+ return (void *) &p->params.ring_multi_nodrop;
+ case PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS:
+ return (void *) &p->params.ring_ipv4_ras;
+ case PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS:
+ return (void *) &p->params.ring_ipv6_ras;
+ case PIPELINE_PORT_OUT_SCHED_WRITER:
+ return (void *) &p->params.sched;
+ case PIPELINE_PORT_OUT_SINK:
+ return (void *) &p->params.sink;
+ default:
+ return NULL;
+ }
+}
+
+static inline void *
+pipeline_port_out_params_get_ops(struct pipeline_port_out_params *p)
+{
+ switch (p->type) {
+ case PIPELINE_PORT_OUT_ETHDEV_WRITER:
+ return &rte_port_ethdev_writer_ops;
+ case PIPELINE_PORT_OUT_ETHDEV_WRITER_NODROP:
+ return &rte_port_ethdev_writer_nodrop_ops;
+ case PIPELINE_PORT_OUT_RING_WRITER:
+ return &rte_port_ring_writer_ops;
+ case PIPELINE_PORT_OUT_RING_MULTI_WRITER:
+ return &rte_port_ring_multi_writer_ops;
+ case PIPELINE_PORT_OUT_RING_WRITER_NODROP:
+ return &rte_port_ring_writer_nodrop_ops;
+ case PIPELINE_PORT_OUT_RING_MULTI_WRITER_NODROP:
+ return &rte_port_ring_multi_writer_nodrop_ops;
+ case PIPELINE_PORT_OUT_RING_WRITER_IPV4_RAS:
+ return &rte_port_ring_writer_ipv4_ras_ops;
+ case PIPELINE_PORT_OUT_RING_WRITER_IPV6_RAS:
+ return &rte_port_ring_writer_ipv6_ras_ops;
+ case PIPELINE_PORT_OUT_SCHED_WRITER:
+ return &rte_port_sched_writer_ops;
+ case PIPELINE_PORT_OUT_SINK:
+ return &rte_port_sink_ops;
+ default:
+ return NULL;
+ }
+}
+
+#ifndef PIPELINE_NAME_SIZE
+#define PIPELINE_NAME_SIZE 32
+#endif
+
+#ifndef PIPELINE_MAX_PORT_IN
+#define PIPELINE_MAX_PORT_IN 16
+#endif
+
+#ifndef PIPELINE_MAX_PORT_OUT
+#define PIPELINE_MAX_PORT_OUT 16
+#endif
+
+#ifndef PIPELINE_MAX_TABLES
+#define PIPELINE_MAX_TABLES 16
+#endif
+
+#ifndef PIPELINE_MAX_MSGQ_IN
+#define PIPELINE_MAX_MSGQ_IN 16
+#endif
+
+#ifndef PIPELINE_MAX_MSGQ_OUT
+#define PIPELINE_MAX_MSGQ_OUT 16
+#endif
+
+#ifndef PIPELINE_MAX_ARGS
+#define PIPELINE_MAX_ARGS 32
+#endif
+
+struct pipeline_params {
+ char name[PIPELINE_NAME_SIZE];
+
+ struct pipeline_port_in_params port_in[PIPELINE_MAX_PORT_IN];
+ struct pipeline_port_out_params port_out[PIPELINE_MAX_PORT_OUT];
+ struct rte_ring *msgq_in[PIPELINE_MAX_MSGQ_IN];
+ struct rte_ring *msgq_out[PIPELINE_MAX_MSGQ_OUT];
+
+ uint32_t n_ports_in;
+ uint32_t n_ports_out;
+ uint32_t n_msgq;
+
+ int socket_id;
+
+ char *args_name[PIPELINE_MAX_ARGS];
+ char *args_value[PIPELINE_MAX_ARGS];
+ uint32_t n_args;
+
+ uint32_t log_level;
+};
+
+/*
+ * Pipeline type back-end operations
+ */
+
+typedef void* (*pipeline_be_op_init)(struct pipeline_params *params,
+ void *arg);
+
+typedef int (*pipeline_be_op_free)(void *pipeline);
+
+typedef int (*pipeline_be_op_run)(void *pipeline);
+
+typedef int (*pipeline_be_op_timer)(void *pipeline);
+
+typedef int (*pipeline_be_op_track)(void *pipeline,
+ uint32_t port_in,
+ uint32_t *port_out);
+
+struct pipeline_be_ops {
+ pipeline_be_op_init f_init;
+ pipeline_be_op_free f_free;
+ pipeline_be_op_run f_run;
+ pipeline_be_op_timer f_timer;
+ pipeline_be_op_track f_track;
+};
+
+/* Pipeline specific config parse error messages */
+#define PIPELINE_ARG_CHECK(exp, fmt, ...) \
+do { \
+ if (!(exp)) { \
+ fprintf(stderr, fmt "\n", ## __VA_ARGS__); \
+ return -1; \
+ } \
+} while (0)
+
+#define PIPELINE_PARSE_ERR_INV_VAL(exp, section, entry, val) \
+PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": entry \"%s\" " \
+ "has invalid value (\"%s\")", section, entry, val)
+
+#define PIPELINE_PARSE_ERR_OUT_RNG(exp, section, entry, val) \
+PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": entry \"%s\" " \
+ "value is out of range (\"%s\")", section, entry, val)
+
+#define PIPELINE_PARSE_ERR_DUPLICATE(exp, section, entry) \
+PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": duplicated " \
+ "entry \"%s\"", section, entry)
+
+#define PIPELINE_PARSE_ERR_INV_ENT(exp, section, entry) \
+PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": invalid entry " \
+ "\"%s\"", section, entry)
+
+#define PIPELINE_PARSE_ERR_MANDATORY(exp, section, entry) \
+PIPELINE_ARG_CHECK(exp, "Parse error in section \"%s\": mandatory " \
+ "entry \"%s\" is missing", section, entry)
+
+#endif
diff --git a/examples/ip_pipeline/thread.c b/examples/ip_pipeline/thread.c
new file mode 100644
index 00000000..a0f1f12f
--- /dev/null
+++ b/examples/ip_pipeline/thread.c
@@ -0,0 +1,322 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_pipeline.h>
+
+#include "pipeline_common_be.h"
+#include "app.h"
+#include "thread.h"
+
+#if APP_THREAD_HEADROOM_STATS_COLLECT
+
+#define PIPELINE_RUN_REGULAR(thread, pipeline) \
+do { \
+ uint64_t t0 = rte_rdtsc_precise(); \
+ int n_pkts = rte_pipeline_run(pipeline->p); \
+ \
+ if (n_pkts == 0) { \
+ uint64_t t1 = rte_rdtsc_precise(); \
+ \
+ thread->headroom_cycles += t1 - t0; \
+ } \
+} while (0)
+
+
+#define PIPELINE_RUN_CUSTOM(thread, data) \
+do { \
+ uint64_t t0 = rte_rdtsc_precise(); \
+ int n_pkts = data->f_run(data->be); \
+ \
+ if (n_pkts == 0) { \
+ uint64_t t1 = rte_rdtsc_precise(); \
+ \
+ thread->headroom_cycles += t1 - t0; \
+ } \
+} while (0)
+
+#else
+
+#define PIPELINE_RUN_REGULAR(thread, pipeline) \
+ rte_pipeline_run(pipeline->p)
+
+#define PIPELINE_RUN_CUSTOM(thread, data) \
+ data->f_run(data->be)
+
+#endif
+
+static inline void *
+thread_msg_recv(struct rte_ring *r)
+{
+ void *msg;
+ int status = rte_ring_sc_dequeue(r, &msg);
+
+ if (status != 0)
+ return NULL;
+
+ return msg;
+}
+
+static inline void
+thread_msg_send(struct rte_ring *r,
+ void *msg)
+{
+ int status;
+
+ do {
+ status = rte_ring_sp_enqueue(r, msg);
+ } while (status == -ENOBUFS);
+}
+
+static int
+thread_pipeline_enable(struct app_thread_data *t,
+ struct thread_pipeline_enable_msg_req *req)
+{
+ struct app_thread_pipeline_data *p;
+
+ if (req->f_run == NULL) {
+ if (t->n_regular >= APP_MAX_THREAD_PIPELINES)
+ return -1;
+ } else {
+ if (t->n_custom >= APP_MAX_THREAD_PIPELINES)
+ return -1;
+ }
+
+ p = (req->f_run == NULL) ?
+ &t->regular[t->n_regular] :
+ &t->custom[t->n_custom];
+
+ p->pipeline_id = req->pipeline_id;
+ p->be = req->be;
+ p->f_run = req->f_run;
+ p->f_timer = req->f_timer;
+ p->timer_period = req->timer_period;
+ p->deadline = 0;
+
+ if (req->f_run == NULL)
+ t->n_regular++;
+ else
+ t->n_custom++;
+
+ return 0;
+}
+
+static int
+thread_pipeline_disable(struct app_thread_data *t,
+ struct thread_pipeline_disable_msg_req *req)
+{
+ uint32_t n_regular = RTE_MIN(t->n_regular, RTE_DIM(t->regular));
+ uint32_t n_custom = RTE_MIN(t->n_custom, RTE_DIM(t->custom));
+ uint32_t i;
+
+ /* search regular pipelines of current thread */
+ for (i = 0; i < n_regular; i++) {
+ if (t->regular[i].pipeline_id != req->pipeline_id)
+ continue;
+
+ if (i < n_regular - 1)
+ memcpy(&t->regular[i],
+ &t->regular[i+1],
+ (n_regular - 1 - i) * sizeof(struct app_thread_pipeline_data));
+
+ n_regular--;
+ t->n_regular = n_regular;
+
+ return 0;
+ }
+
+ /* search custom pipelines of current thread */
+ for (i = 0; i < n_custom; i++) {
+ if (t->custom[i].pipeline_id != req->pipeline_id)
+ continue;
+
+ if (i < n_custom - 1)
+ memcpy(&t->custom[i],
+ &t->custom[i+1],
+ (n_custom - 1 - i) * sizeof(struct app_thread_pipeline_data));
+
+ n_custom--;
+ t->n_custom = n_custom;
+
+ return 0;
+ }
+
+ /* return if pipeline not found */
+ return -1;
+}
+
+static int
+thread_msg_req_handle(struct app_thread_data *t)
+{
+ void *msg_ptr;
+ struct thread_msg_req *req;
+ struct thread_msg_rsp *rsp;
+
+ msg_ptr = thread_msg_recv(t->msgq_in);
+ req = msg_ptr;
+ rsp = msg_ptr;
+
+ if (req != NULL)
+ switch (req->type) {
+ case THREAD_MSG_REQ_PIPELINE_ENABLE: {
+ rsp->status = thread_pipeline_enable(t,
+ (struct thread_pipeline_enable_msg_req *) req);
+ thread_msg_send(t->msgq_out, rsp);
+ break;
+ }
+
+ case THREAD_MSG_REQ_PIPELINE_DISABLE: {
+ rsp->status = thread_pipeline_disable(t,
+ (struct thread_pipeline_disable_msg_req *) req);
+ thread_msg_send(t->msgq_out, rsp);
+ break;
+ }
+
+ case THREAD_MSG_REQ_HEADROOM_READ: {
+ struct thread_headroom_read_msg_rsp *rsp =
+ (struct thread_headroom_read_msg_rsp *)
+ req;
+
+ rsp->headroom_ratio = t->headroom_ratio;
+ rsp->status = 0;
+ thread_msg_send(t->msgq_out, rsp);
+ break;
+ }
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+static void
+thread_headroom_update(struct app_thread_data *t, uint64_t time)
+{
+ uint64_t time_diff = time - t->headroom_time;
+
+ t->headroom_ratio =
+ ((double) t->headroom_cycles) / ((double) time_diff);
+
+ t->headroom_cycles = 0;
+ t->headroom_time = rte_rdtsc_precise();
+}
+
+int
+app_thread(void *arg)
+{
+ struct app_params *app = (struct app_params *) arg;
+ uint32_t core_id = rte_lcore_id(), i, j;
+ struct app_thread_data *t = &app->thread_data[core_id];
+
+ for (i = 0; ; i++) {
+ uint32_t n_regular = RTE_MIN(t->n_regular, RTE_DIM(t->regular));
+ uint32_t n_custom = RTE_MIN(t->n_custom, RTE_DIM(t->custom));
+
+ /* Run regular pipelines */
+ for (j = 0; j < n_regular; j++) {
+ struct app_thread_pipeline_data *data = &t->regular[j];
+ struct pipeline *p = data->be;
+
+ PIPELINE_RUN_REGULAR(t, p);
+ }
+
+ /* Run custom pipelines */
+ for (j = 0; j < n_custom; j++) {
+ struct app_thread_pipeline_data *data = &t->custom[j];
+
+ PIPELINE_RUN_CUSTOM(t, data);
+ }
+
+ /* Timer */
+ if ((i & 0xF) == 0) {
+ uint64_t time = rte_get_tsc_cycles();
+ uint64_t t_deadline = UINT64_MAX;
+
+ if (time < t->deadline)
+ continue;
+
+ /* Timer for regular pipelines */
+ for (j = 0; j < n_regular; j++) {
+ struct app_thread_pipeline_data *data =
+ &t->regular[j];
+ uint64_t p_deadline = data->deadline;
+
+ if (p_deadline <= time) {
+ data->f_timer(data->be);
+ p_deadline = time + data->timer_period;
+ data->deadline = p_deadline;
+ }
+
+ if (p_deadline < t_deadline)
+ t_deadline = p_deadline;
+ }
+
+ /* Timer for custom pipelines */
+ for (j = 0; j < n_custom; j++) {
+ struct app_thread_pipeline_data *data =
+ &t->custom[j];
+ uint64_t p_deadline = data->deadline;
+
+ if (p_deadline <= time) {
+ data->f_timer(data->be);
+ p_deadline = time + data->timer_period;
+ data->deadline = p_deadline;
+ }
+
+ if (p_deadline < t_deadline)
+ t_deadline = p_deadline;
+ }
+
+ /* Timer for thread message request */
+ {
+ uint64_t deadline = t->thread_req_deadline;
+
+ if (deadline <= time) {
+ thread_msg_req_handle(t);
+ thread_headroom_update(t, time);
+ deadline = time + t->timer_period;
+ t->thread_req_deadline = deadline;
+ }
+
+ if (deadline < t_deadline)
+ t_deadline = deadline;
+ }
+
+
+ t->deadline = t_deadline;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/thread.h b/examples/ip_pipeline/thread.h
new file mode 100644
index 00000000..e52b22e6
--- /dev/null
+++ b/examples/ip_pipeline/thread.h
@@ -0,0 +1,98 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef THREAD_H_
+#define THREAD_H_
+
+#include "app.h"
+#include "pipeline_be.h"
+
+enum thread_msg_req_type {
+ THREAD_MSG_REQ_PIPELINE_ENABLE = 0,
+ THREAD_MSG_REQ_PIPELINE_DISABLE,
+ THREAD_MSG_REQ_HEADROOM_READ,
+ THREAD_MSG_REQS
+};
+
+struct thread_msg_req {
+ enum thread_msg_req_type type;
+};
+
+struct thread_msg_rsp {
+ int status;
+};
+
+/*
+ * PIPELINE ENABLE
+ */
+struct thread_pipeline_enable_msg_req {
+ enum thread_msg_req_type type;
+
+ uint32_t pipeline_id;
+ void *be;
+ pipeline_be_op_run f_run;
+ pipeline_be_op_timer f_timer;
+ uint64_t timer_period;
+};
+
+struct thread_pipeline_enable_msg_rsp {
+ int status;
+};
+
+/*
+ * PIPELINE DISABLE
+ */
+struct thread_pipeline_disable_msg_req {
+ enum thread_msg_req_type type;
+
+ uint32_t pipeline_id;
+};
+
+struct thread_pipeline_disable_msg_rsp {
+ int status;
+};
+
+/*
+ * THREAD HEADROOM
+ */
+struct thread_headroom_read_msg_req {
+ enum thread_msg_req_type type;
+};
+
+struct thread_headroom_read_msg_rsp {
+ int status;
+
+ double headroom_ratio;
+};
+
+#endif /* THREAD_H_ */
diff --git a/examples/ip_pipeline/thread_fe.c b/examples/ip_pipeline/thread_fe.c
new file mode 100644
index 00000000..4a435f7c
--- /dev/null
+++ b/examples/ip_pipeline/thread_fe.c
@@ -0,0 +1,461 @@
+#include <rte_common.h>
+#include <rte_ring.h>
+#include <rte_malloc.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_ipaddr.h>
+#include <cmdline_parse_etheraddr.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "thread.h"
+#include "thread_fe.h"
+#include "pipeline.h"
+#include "pipeline_common_fe.h"
+#include "app.h"
+
+static inline void *
+thread_msg_send_recv(struct app_params *app,
+ uint32_t socket_id, uint32_t core_id, uint32_t ht_id,
+ void *msg,
+ uint32_t timeout_ms)
+{
+ struct rte_ring *r_req = app_thread_msgq_in_get(app,
+ socket_id, core_id, ht_id);
+ struct rte_ring *r_rsp = app_thread_msgq_out_get(app,
+ socket_id, core_id, ht_id);
+ uint64_t hz = rte_get_tsc_hz();
+ void *msg_recv;
+ uint64_t deadline;
+ int status;
+
+ /* send */
+ do {
+ status = rte_ring_sp_enqueue(r_req, (void *) msg);
+ } while (status == -ENOBUFS);
+
+ /* recv */
+ deadline = (timeout_ms) ?
+ (rte_rdtsc() + ((hz * timeout_ms) / 1000)) :
+ UINT64_MAX;
+
+ do {
+ if (rte_rdtsc() > deadline)
+ return NULL;
+
+ status = rte_ring_sc_dequeue(r_rsp, &msg_recv);
+ } while (status != 0);
+
+ return msg_recv;
+}
+
+int
+app_pipeline_enable(struct app_params *app,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t hyper_th_id,
+ uint32_t pipeline_id)
+{
+ struct thread_pipeline_enable_msg_req *req;
+ struct thread_pipeline_enable_msg_rsp *rsp;
+ int thread_id;
+ struct app_pipeline_data *p;
+ struct app_pipeline_params *p_params;
+ struct pipeline_type *p_type;
+ int status;
+
+ if (app == NULL)
+ return -1;
+
+ thread_id = cpu_core_map_get_lcore_id(app->core_map,
+ socket_id,
+ core_id,
+ hyper_th_id);
+
+ if ((thread_id < 0) ||
+ ((app->core_mask & (1LLU << thread_id)) == 0))
+ return -1;
+
+ if (app_pipeline_data(app, pipeline_id) == NULL)
+ return -1;
+
+ p = &app->pipeline_data[pipeline_id];
+ p_params = &app->pipeline_params[pipeline_id];
+ p_type = app_pipeline_type_find(app, p_params->type);
+
+ if (p->enabled == 1)
+ return -1;
+
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = THREAD_MSG_REQ_PIPELINE_ENABLE;
+ req->pipeline_id = pipeline_id;
+ req->be = p->be;
+ req->f_run = p_type->be_ops->f_run;
+ req->f_timer = p_type->be_ops->f_timer;
+ req->timer_period = p->timer_period;
+
+ rsp = thread_msg_send_recv(app,
+ socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT);
+ if (rsp == NULL)
+ return -1;
+
+ status = rsp->status;
+ app_msg_free(app, rsp);
+
+ if (status != 0)
+ return -1;
+
+ p->enabled = 1;
+ return 0;
+}
+
+int
+app_pipeline_disable(struct app_params *app,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t hyper_th_id,
+ uint32_t pipeline_id)
+{
+ struct thread_pipeline_disable_msg_req *req;
+ struct thread_pipeline_disable_msg_rsp *rsp;
+ int thread_id;
+ struct app_pipeline_data *p;
+ int status;
+
+ if (app == NULL)
+ return -1;
+
+ thread_id = cpu_core_map_get_lcore_id(app->core_map,
+ socket_id,
+ core_id,
+ hyper_th_id);
+
+ if ((thread_id < 0) ||
+ ((app->core_mask & (1LLU << thread_id)) == 0))
+ return -1;
+
+ if (app_pipeline_data(app, pipeline_id) == NULL)
+ return -1;
+
+ p = &app->pipeline_data[pipeline_id];
+
+ if (p->enabled == 0)
+ return -1;
+
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = THREAD_MSG_REQ_PIPELINE_DISABLE;
+ req->pipeline_id = pipeline_id;
+
+ rsp = thread_msg_send_recv(app,
+ socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT);
+
+ if (rsp == NULL)
+ return -1;
+
+ status = rsp->status;
+ app_msg_free(app, rsp);
+
+ if (status != 0)
+ return -1;
+
+ p->enabled = 0;
+ return 0;
+}
+
+int
+app_thread_headroom(struct app_params *app,
+ uint32_t socket_id,
+ uint32_t core_id,
+ uint32_t hyper_th_id)
+{
+ struct thread_headroom_read_msg_req *req;
+ struct thread_headroom_read_msg_rsp *rsp;
+ int thread_id;
+ int status;
+
+ if (app == NULL)
+ return -1;
+
+ thread_id = cpu_core_map_get_lcore_id(app->core_map,
+ socket_id,
+ core_id,
+ hyper_th_id);
+
+ if ((thread_id < 0) ||
+ ((app->core_mask & (1LLU << thread_id)) == 0))
+ return -1;
+
+ req = app_msg_alloc(app);
+ if (req == NULL)
+ return -1;
+
+ req->type = THREAD_MSG_REQ_HEADROOM_READ;
+
+ rsp = thread_msg_send_recv(app,
+ socket_id, core_id, hyper_th_id, req, MSG_TIMEOUT_DEFAULT);
+
+ if (rsp == NULL)
+ return -1;
+
+ status = rsp->status;
+
+ if (status != 0)
+ return -1;
+
+ printf("%.3f%%\n", rsp->headroom_ratio * 100);
+
+
+ app_msg_free(app, rsp);
+
+ return 0;
+}
+
+/*
+ * pipeline enable
+ */
+
+struct cmd_pipeline_enable_result {
+ cmdline_fixed_string_t t_string;
+ cmdline_fixed_string_t t_id_string;
+ cmdline_fixed_string_t pipeline_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t enable_string;
+};
+
+static void
+cmd_pipeline_enable_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_pipeline_enable_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+ uint32_t core_id, socket_id, hyper_th_id;
+
+ if (parse_pipeline_core(&socket_id,
+ &core_id,
+ &hyper_th_id,
+ params->t_id_string) != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ status = app_pipeline_enable(app,
+ socket_id,
+ core_id,
+ hyper_th_id,
+ params->pipeline_id);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_pipeline_enable_t_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, t_string, "t");
+
+cmdline_parse_token_string_t cmd_pipeline_enable_t_id_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, t_id_string,
+ NULL);
+
+cmdline_parse_token_string_t cmd_pipeline_enable_pipeline_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, pipeline_string,
+ "pipeline");
+
+cmdline_parse_token_num_t cmd_pipeline_enable_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_pipeline_enable_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_pipeline_enable_enable_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_enable_result, enable_string,
+ "enable");
+
+cmdline_parse_inst_t cmd_pipeline_enable = {
+ .f = cmd_pipeline_enable_parsed,
+ .data = NULL,
+ .help_str = "Enable pipeline on specified core",
+ .tokens = {
+ (void *)&cmd_pipeline_enable_t_string,
+ (void *)&cmd_pipeline_enable_t_id_string,
+ (void *)&cmd_pipeline_enable_pipeline_string,
+ (void *)&cmd_pipeline_enable_pipeline_id,
+ (void *)&cmd_pipeline_enable_enable_string,
+ NULL,
+ },
+};
+
+/*
+ * pipeline disable
+ */
+
+struct cmd_pipeline_disable_result {
+ cmdline_fixed_string_t t_string;
+ cmdline_fixed_string_t t_id_string;
+ cmdline_fixed_string_t pipeline_string;
+ uint32_t pipeline_id;
+ cmdline_fixed_string_t disable_string;
+};
+
+static void
+cmd_pipeline_disable_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_pipeline_disable_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+ uint32_t core_id, socket_id, hyper_th_id;
+
+ if (parse_pipeline_core(&socket_id,
+ &core_id,
+ &hyper_th_id,
+ params->t_id_string) != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ status = app_pipeline_disable(app,
+ socket_id,
+ core_id,
+ hyper_th_id,
+ params->pipeline_id);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_pipeline_disable_t_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, t_string, "t");
+
+cmdline_parse_token_string_t cmd_pipeline_disable_t_id_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, t_id_string,
+ NULL);
+
+cmdline_parse_token_string_t cmd_pipeline_disable_pipeline_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result,
+ pipeline_string, "pipeline");
+
+cmdline_parse_token_num_t cmd_pipeline_disable_pipeline_id =
+ TOKEN_NUM_INITIALIZER(struct cmd_pipeline_disable_result, pipeline_id,
+ UINT32);
+
+cmdline_parse_token_string_t cmd_pipeline_disable_disable_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipeline_disable_result, disable_string,
+ "disable");
+
+cmdline_parse_inst_t cmd_pipeline_disable = {
+ .f = cmd_pipeline_disable_parsed,
+ .data = NULL,
+ .help_str = "Disable pipeline on specified core",
+ .tokens = {
+ (void *)&cmd_pipeline_disable_t_string,
+ (void *)&cmd_pipeline_disable_t_id_string,
+ (void *)&cmd_pipeline_disable_pipeline_string,
+ (void *)&cmd_pipeline_disable_pipeline_id,
+ (void *)&cmd_pipeline_disable_disable_string,
+ NULL,
+ },
+};
+
+
+/*
+ * thread headroom
+ */
+
+struct cmd_thread_headroom_result {
+ cmdline_fixed_string_t t_string;
+ cmdline_fixed_string_t t_id_string;
+ cmdline_fixed_string_t headroom_string;
+};
+
+static void
+cmd_thread_headroom_parsed(
+ void *parsed_result,
+ __rte_unused struct cmdline *cl,
+ void *data)
+{
+ struct cmd_thread_headroom_result *params = parsed_result;
+ struct app_params *app = data;
+ int status;
+ uint32_t core_id, socket_id, hyper_th_id;
+
+ if (parse_pipeline_core(&socket_id,
+ &core_id,
+ &hyper_th_id,
+ params->t_id_string) != 0) {
+ printf("Command failed\n");
+ return;
+ }
+
+ status = app_thread_headroom(app,
+ socket_id,
+ core_id,
+ hyper_th_id);
+
+ if (status != 0)
+ printf("Command failed\n");
+}
+
+cmdline_parse_token_string_t cmd_thread_headroom_t_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result,
+ t_string, "t");
+
+cmdline_parse_token_string_t cmd_thread_headroom_t_id_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result,
+ t_id_string, NULL);
+
+cmdline_parse_token_string_t cmd_thread_headroom_headroom_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_thread_headroom_result,
+ headroom_string, "headroom");
+
+cmdline_parse_inst_t cmd_thread_headroom = {
+ .f = cmd_thread_headroom_parsed,
+ .data = NULL,
+ .help_str = "Display thread headroom",
+ .tokens = {
+ (void *)&cmd_thread_headroom_t_string,
+ (void *)&cmd_thread_headroom_t_id_string,
+ (void *)&cmd_thread_headroom_headroom_string,
+ NULL,
+ },
+};
+
+
+static cmdline_parse_ctx_t thread_cmds[] = {
+ (cmdline_parse_inst_t *) &cmd_pipeline_enable,
+ (cmdline_parse_inst_t *) &cmd_pipeline_disable,
+ (cmdline_parse_inst_t *) &cmd_thread_headroom,
+ NULL,
+};
+
+int
+app_pipeline_thread_cmd_push(struct app_params *app)
+{
+ uint32_t n_cmds, i;
+
+ /* Check for available slots in the application commands array */
+ n_cmds = RTE_DIM(thread_cmds) - 1;
+ if (n_cmds > APP_MAX_CMDS - app->n_cmds)
+ return -ENOMEM;
+
+ /* Push thread commands into the application */
+ memcpy(&app->cmds[app->n_cmds], thread_cmds,
+ n_cmds * sizeof(cmdline_parse_ctx_t));
+
+ for (i = 0; i < n_cmds; i++)
+ app->cmds[app->n_cmds + i]->data = app;
+
+ app->n_cmds += n_cmds;
+ app->cmds[app->n_cmds] = NULL;
+
+ return 0;
+}
diff --git a/examples/ip_pipeline/thread_fe.h b/examples/ip_pipeline/thread_fe.h
new file mode 100644
index 00000000..2fd4ee8e
--- /dev/null
+++ b/examples/ip_pipeline/thread_fe.h
@@ -0,0 +1,101 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef THREAD_FE_H_
+#define THREAD_FE_H_
+
+static inline struct rte_ring *
+app_thread_msgq_in_get(struct app_params *app,
+ uint32_t socket_id, uint32_t core_id, uint32_t ht_id)
+{
+ char msgq_name[32];
+ ssize_t param_idx;
+
+ snprintf(msgq_name, sizeof(msgq_name),
+ "MSGQ-REQ-CORE-s%" PRIu32 "c%" PRIu32 "%s",
+ socket_id,
+ core_id,
+ (ht_id) ? "h" : "");
+ param_idx = APP_PARAM_FIND(app->msgq_params, msgq_name);
+
+ if (param_idx < 0)
+ return NULL;
+
+ return app->msgq[param_idx];
+}
+
+static inline struct rte_ring *
+app_thread_msgq_out_get(struct app_params *app,
+ uint32_t socket_id, uint32_t core_id, uint32_t ht_id)
+{
+ char msgq_name[32];
+ ssize_t param_idx;
+
+ snprintf(msgq_name, sizeof(msgq_name),
+ "MSGQ-RSP-CORE-s%" PRIu32 "c%" PRIu32 "%s",
+ socket_id,
+ core_id,
+ (ht_id) ? "h" : "");
+ param_idx = APP_PARAM_FIND(app->msgq_params, msgq_name);
+
+ if (param_idx < 0)
+ return NULL;
+
+ return app->msgq[param_idx];
+
+}
+
+int
+app_pipeline_thread_cmd_push(struct app_params *app);
+
+int
+app_pipeline_enable(struct app_params *app,
+ uint32_t core_id,
+ uint32_t socket_id,
+ uint32_t hyper_th_id,
+ uint32_t pipeline_id);
+
+int
+app_pipeline_disable(struct app_params *app,
+ uint32_t core_id,
+ uint32_t socket_id,
+ uint32_t hyper_th_id,
+ uint32_t pipeline_id);
+
+int
+app_thread_headroom(struct app_params *app,
+ uint32_t core_id,
+ uint32_t socket_id,
+ uint32_t hyper_th_id);
+
+#endif /* THREAD_FE_H_ */
diff --git a/examples/ip_reassembly/Makefile b/examples/ip_reassembly/Makefile
new file mode 100644
index 00000000..d9539a3a
--- /dev/null
+++ b/examples/ip_reassembly/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ip_reassembly
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ip_reassembly/main.c b/examples/ip_reassembly/main.c
new file mode 100644
index 00000000..c27e7353
--- /dev/null
+++ b/examples/ip_reassembly/main.c
@@ -0,0 +1,1185 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/param.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+
+#include <rte_ip_frag.h>
+
+#define MAX_PKT_BURST 32
+
+
+#define RTE_LOGTYPE_IP_RSMBL RTE_LOGTYPE_USER1
+
+#define MAX_JUMBO_PKT_LEN 9600
+
+#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM
+#define MBUF_SIZE \
+ (BUF_SIZE + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+
+#define NB_MBUF 8192
+
+/* allow max jumbo frame 9.5 KB */
+#define JUMBO_FRAME_MAX_SIZE 0x2600
+
+#define MAX_FLOW_NUM UINT16_MAX
+#define MIN_FLOW_NUM 1
+#define DEF_FLOW_NUM 0x1000
+
+/* TTL numbers are in ms. */
+#define MAX_FLOW_TTL (3600 * MS_PER_S)
+#define MIN_FLOW_TTL 1
+#define DEF_FLOW_TTL MS_PER_S
+
+#define MAX_FRAG_NUM RTE_LIBRTE_IP_FRAG_MAX_FRAG
+
+/* Should be power of two. */
+#define IP_FRAG_TBL_BUCKET_ENTRIES 16
+
+static uint32_t max_flow_num = DEF_FLOW_NUM;
+static uint32_t max_flow_ttl = DEF_FLOW_TTL;
+
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define NB_SOCKETS 8
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+#ifndef IPv4_BYTES
+#define IPv4_BYTES_FMT "%" PRIu8 ".%" PRIu8 ".%" PRIu8 ".%" PRIu8
+#define IPv4_BYTES(addr) \
+ (uint8_t) (((addr) >> 24) & 0xFF),\
+ (uint8_t) (((addr) >> 16) & 0xFF),\
+ (uint8_t) (((addr) >> 8) & 0xFF),\
+ (uint8_t) ((addr) & 0xFF)
+#endif
+
+#ifndef IPv6_BYTES
+#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
+ "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+#define IPv6_BYTES(addr) \
+ addr[0], addr[1], addr[2], addr[3], \
+ addr[4], addr[5], addr[6], addr[7], \
+ addr[8], addr[9], addr[10], addr[11],\
+ addr[12], addr[13],addr[14], addr[15]
+#endif
+
+#define IPV6_ADDR_LEN 16
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+
+static int rx_queue_per_lcore = 1;
+
+struct mbuf_table {
+ uint32_t len;
+ uint32_t head;
+ uint32_t tail;
+ struct rte_mbuf *m_table[0];
+};
+
+struct rx_queue {
+ struct rte_ip_frag_tbl *frag_tbl;
+ struct rte_mempool *pool;
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ uint8_t portid;
+};
+
+struct tx_lcore_stat {
+ uint64_t call;
+ uint64_t drop;
+ uint64_t queue;
+ uint64_t send;
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+#define MAX_RX_QUEUE_PER_PORT 128
+
+struct lcore_queue_conf {
+ uint16_t n_rx_queue;
+ struct rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct rte_ip_frag_death_row death_row;
+ struct mbuf_table *tx_mbufs[RTE_MAX_ETHPORTS];
+ struct tx_lcore_stat tx_stat;
+} __rte_cache_aligned;
+static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 1, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+/*
+ * IPv4 forwarding table
+ */
+struct l3fwd_ipv4_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+struct l3fwd_ipv4_route l3fwd_ipv4_route_array[] = {
+ {IPv4(100,10,0,0), 16, 0},
+ {IPv4(100,20,0,0), 16, 1},
+ {IPv4(100,30,0,0), 16, 2},
+ {IPv4(100,40,0,0), 16, 3},
+ {IPv4(100,50,0,0), 16, 4},
+ {IPv4(100,60,0,0), 16, 5},
+ {IPv4(100,70,0,0), 16, 6},
+ {IPv4(100,80,0,0), 16, 7},
+};
+
+/*
+ * IPv6 forwarding table
+ */
+
+struct l3fwd_ipv6_route {
+ uint8_t ip[IPV6_ADDR_LEN];
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct l3fwd_ipv6_route l3fwd_ipv6_route_array[] = {
+ {{1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 0},
+ {{2,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 1},
+ {{3,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 2},
+ {{4,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 3},
+ {{5,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 4},
+ {{6,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 5},
+ {{7,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 6},
+ {{8,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1}, 48, 7},
+};
+
+#define LPM_MAX_RULES 1024
+#define LPM6_MAX_RULES 1024
+#define LPM6_NUMBER_TBL8S (1 << 16)
+
+struct rte_lpm6_config lpm6_config = {
+ .max_rules = LPM6_MAX_RULES,
+ .number_tbl8s = LPM6_NUMBER_TBL8S,
+ .flags = 0
+};
+
+static struct rte_lpm *socket_lpm[RTE_MAX_NUMA_NODES];
+static struct rte_lpm6 *socket_lpm6[RTE_MAX_NUMA_NODES];
+
+#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT
+#define TX_LCORE_STAT_UPDATE(s, f, v) ((s)->f += (v))
+#else
+#define TX_LCORE_STAT_UPDATE(s, f, v) do {} while (0)
+#endif /* RTE_LIBRTE_IP_FRAG_TBL_STAT */
+
+/*
+ * If number of queued packets reached given threahold, then
+ * send burst of packets on an output interface.
+ */
+static inline uint32_t
+send_burst(struct lcore_queue_conf *qconf, uint32_t thresh, uint8_t port)
+{
+ uint32_t fill, len, k, n;
+ struct mbuf_table *txmb;
+
+ txmb = qconf->tx_mbufs[port];
+ len = txmb->len;
+
+ if ((int32_t)(fill = txmb->head - txmb->tail) < 0)
+ fill += len;
+
+ if (fill >= thresh) {
+ n = RTE_MIN(len - txmb->tail, fill);
+
+ k = rte_eth_tx_burst(port, qconf->tx_queue_id[port],
+ txmb->m_table + txmb->tail, (uint16_t)n);
+
+ TX_LCORE_STAT_UPDATE(&qconf->tx_stat, call, 1);
+ TX_LCORE_STAT_UPDATE(&qconf->tx_stat, send, k);
+
+ fill -= k;
+ if ((txmb->tail += k) == len)
+ txmb->tail = 0;
+ }
+
+ return fill;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint32_t fill, lcore_id, len;
+ struct lcore_queue_conf *qconf;
+ struct mbuf_table *txmb;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ txmb = qconf->tx_mbufs[port];
+ len = txmb->len;
+
+ fill = send_burst(qconf, MAX_PKT_BURST, port);
+
+ if (fill == len - 1) {
+ TX_LCORE_STAT_UPDATE(&qconf->tx_stat, drop, 1);
+ rte_pktmbuf_free(txmb->m_table[txmb->tail]);
+ if (++txmb->tail == len)
+ txmb->tail = 0;
+ }
+
+ TX_LCORE_STAT_UPDATE(&qconf->tx_stat, queue, 1);
+ txmb->m_table[txmb->head] = m;
+ if(++txmb->head == len)
+ txmb->head = 0;
+
+ return 0;
+}
+
+static inline void
+reassemble(struct rte_mbuf *m, uint8_t portid, uint32_t queue,
+ struct lcore_queue_conf *qconf, uint64_t tms)
+{
+ struct ether_hdr *eth_hdr;
+ struct rte_ip_frag_tbl *tbl;
+ struct rte_ip_frag_death_row *dr;
+ struct rx_queue *rxq;
+ void *d_addr_bytes;
+ uint32_t next_hop_ipv4;
+ uint8_t next_hop_ipv6, dst_port;
+
+ rxq = &qconf->rx_queue_list[queue];
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ dst_port = portid;
+
+ /* if packet is IPv4 */
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ struct ipv4_hdr *ip_hdr;
+ uint32_t ip_dst;
+
+ ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+
+ /* if it is a fragmented packet, then try to reassemble. */
+ if (rte_ipv4_frag_pkt_is_fragmented(ip_hdr)) {
+ struct rte_mbuf *mo;
+
+ tbl = rxq->frag_tbl;
+ dr = &qconf->death_row;
+
+ /* prepare mbuf: setup l2_len/l3_len. */
+ m->l2_len = sizeof(*eth_hdr);
+ m->l3_len = sizeof(*ip_hdr);
+
+ /* process this fragment. */
+ mo = rte_ipv4_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr);
+ if (mo == NULL)
+ /* no packet to send out. */
+ return;
+
+ /* we have our packet reassembled. */
+ if (mo != m) {
+ m = mo;
+ eth_hdr = rte_pktmbuf_mtod(m,
+ struct ether_hdr *);
+ ip_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ }
+ }
+ ip_dst = rte_be_to_cpu_32(ip_hdr->dst_addr);
+
+ /* Find destination port */
+ if (rte_lpm_lookup(rxq->lpm, ip_dst, &next_hop_ipv4) == 0 &&
+ (enabled_port_mask & 1 << next_hop_ipv4) != 0) {
+ dst_port = next_hop_ipv4;
+ }
+
+ eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* if packet is IPv6 */
+ struct ipv6_extension_fragment *frag_hdr;
+ struct ipv6_hdr *ip_hdr;
+
+ ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+
+ frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(ip_hdr);
+
+ if (frag_hdr != NULL) {
+ struct rte_mbuf *mo;
+
+ tbl = rxq->frag_tbl;
+ dr = &qconf->death_row;
+
+ /* prepare mbuf: setup l2_len/l3_len. */
+ m->l2_len = sizeof(*eth_hdr);
+ m->l3_len = sizeof(*ip_hdr) + sizeof(*frag_hdr);
+
+ mo = rte_ipv6_frag_reassemble_packet(tbl, dr, m, tms, ip_hdr, frag_hdr);
+ if (mo == NULL)
+ return;
+
+ if (mo != m) {
+ m = mo;
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ip_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+ }
+ }
+
+ /* Find destination port */
+ if (rte_lpm6_lookup(rxq->lpm6, ip_hdr->dst_addr, &next_hop_ipv6) == 0 &&
+ (enabled_port_mask & 1 << next_hop_ipv6) != 0) {
+ dst_port = next_hop_ipv6;
+ }
+
+ eth_hdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv6);
+ }
+ /* if packet wasn't IPv4 or IPv6, it's forwarded to the port it came from */
+
+ /* 02:00:00:00:00:xx */
+ d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)d_addr_bytes) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+}
+
+/* main processing loop */
+static int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t diff_tsc, cur_tsc, prev_tsc;
+ int i, j, nb_rx;
+ uint8_t portid;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, IP_RSMBL, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, IP_RSMBL, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].portid;
+ RTE_LOG(INFO, IP_RSMBL, " -- lcoreid=%u portid=%hhu\n", lcore_id,
+ portid);
+ }
+
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ /*
+ * This could be optimized (use queueid instead of
+ * portid), but it is not called so often
+ */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if ((enabled_port_mask & (1 << portid)) != 0)
+ send_burst(qconf, 1, portid);
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+
+ portid = qconf->rx_queue_list[i].portid;
+
+ nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
+ MAX_PKT_BURST);
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_burst[j], void *));
+ }
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ reassemble(pkts_burst[j], portid,
+ i, qconf, cur_tsc);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++) {
+ reassemble(pkts_burst[j], portid,
+ i, qconf, cur_tsc);
+ }
+
+ rte_ip_frag_free_death_row(&qconf->death_row,
+ PREFETCH_OFFSET);
+ }
+ }
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]"
+ " [--max-pkt-len PKTLEN]"
+ " [--maxflows=<flows>] [--flowttl=<ttl>[(s|ms)]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of RX queues per lcore\n"
+ " --maxflows=<flows>: optional, maximum number of flows "
+ "supported\n"
+ " --flowttl=<ttl>[(s|ms)]: optional, maximum TTL for each "
+ "flow\n",
+ prgname);
+}
+
+static uint32_t
+parse_flow_num(const char *str, uint32_t min, uint32_t max, uint32_t *val)
+{
+ char *end;
+ uint64_t v;
+
+ /* parse decimal string */
+ errno = 0;
+ v = strtoul(str, &end, 10);
+ if (errno != 0 || *end != '\0')
+ return -EINVAL;
+
+ if (v < min || v > max)
+ return -EINVAL;
+
+ *val = (uint32_t)v;
+ return 0;
+}
+
+static int
+parse_flow_ttl(const char *str, uint32_t min, uint32_t max, uint32_t *val)
+{
+ char *end;
+ uint64_t v;
+
+ static const char frmt_sec[] = "s";
+ static const char frmt_msec[] = "ms";
+
+ /* parse decimal string */
+ errno = 0;
+ v = strtoul(str, &end, 10);
+ if (errno != 0)
+ return -EINVAL;
+
+ if (*end != '\0') {
+ if (strncmp(frmt_sec, end, sizeof(frmt_sec)) == 0)
+ v *= MS_PER_S;
+ else if (strncmp(frmt_msec, end, sizeof (frmt_msec)) != 0)
+ return -EINVAL;
+ }
+
+ if (v < min || v > max)
+ return -EINVAL;
+
+ *val = (uint32_t)v;
+ return 0;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ printf("%p\n", q_arg);
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n == 0)
+ return -1;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"max-pkt-len", 1, 0, 0},
+ {"maxflows", 1, 0, 0},
+ {"flowttl", 1, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ rx_queue_per_lcore = parse_nqueue(optarg);
+ if (rx_queue_per_lcore < 0) {
+ printf("invalid queue number\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ if (!strncmp(lgopts[option_index].name,
+ "maxflows", 8)) {
+ if ((ret = parse_flow_num(optarg, MIN_FLOW_NUM,
+ MAX_FLOW_NUM,
+ &max_flow_num)) != 0) {
+ printf("invalid value: \"%s\" for "
+ "parameter %s\n",
+ optarg,
+ lgopts[option_index].name);
+ print_usage(prgname);
+ return ret;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name, "flowttl", 7)) {
+ if ((ret = parse_flow_ttl(optarg, MIN_FLOW_TTL,
+ MAX_FLOW_TTL,
+ &max_flow_ttl)) != 0) {
+ printf("invalid value: \"%s\" for "
+ "parameter %s\n",
+ optarg,
+ lgopts[option_index].name);
+ print_usage(prgname);
+ return ret;
+ }
+ }
+
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("\ndone\n");
+ }
+ }
+}
+
+static int
+init_routing_table(void)
+{
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ int socket, ret;
+ unsigned i;
+
+ for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) {
+ if (socket_lpm[socket]) {
+ lpm = socket_lpm[socket];
+ /* populate the LPM table */
+ for (i = 0; i < RTE_DIM(l3fwd_ipv4_route_array); i++) {
+ ret = rte_lpm_add(lpm,
+ l3fwd_ipv4_route_array[i].ip,
+ l3fwd_ipv4_route_array[i].depth,
+ l3fwd_ipv4_route_array[i].if_out);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd "
+ "LPM table\n", i);
+ return -1;
+ }
+
+ RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv4_BYTES_FMT
+ "/%d (port %d)\n",
+ socket,
+ IPv4_BYTES(l3fwd_ipv4_route_array[i].ip),
+ l3fwd_ipv4_route_array[i].depth,
+ l3fwd_ipv4_route_array[i].if_out);
+ }
+ }
+
+ if (socket_lpm6[socket]) {
+ lpm6 = socket_lpm6[socket];
+ /* populate the LPM6 table */
+ for (i = 0; i < RTE_DIM(l3fwd_ipv6_route_array); i++) {
+ ret = rte_lpm6_add(lpm6,
+ l3fwd_ipv6_route_array[i].ip,
+ l3fwd_ipv6_route_array[i].depth,
+ l3fwd_ipv6_route_array[i].if_out);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, IP_RSMBL, "Unable to add entry %i to the l3fwd "
+ "LPM6 table\n", i);
+ return -1;
+ }
+
+ RTE_LOG(INFO, IP_RSMBL, "Socket %i: adding route " IPv6_BYTES_FMT
+ "/%d (port %d)\n",
+ socket,
+ IPv6_BYTES(l3fwd_ipv6_route_array[i].ip),
+ l3fwd_ipv6_route_array[i].depth,
+ l3fwd_ipv6_route_array[i].if_out);
+ }
+ }
+ }
+ return 0;
+}
+
+static int
+setup_port_tbl(struct lcore_queue_conf *qconf, uint32_t lcore, int socket,
+ uint32_t port)
+{
+ struct mbuf_table *mtb;
+ uint32_t n;
+ size_t sz;
+
+ n = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST);
+ sz = sizeof (*mtb) + sizeof (mtb->m_table[0]) * n;
+
+ if ((mtb = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE,
+ socket)) == NULL) {
+ RTE_LOG(ERR, IP_RSMBL, "%s() for lcore: %u, port: %u "
+ "failed to allocate %zu bytes\n",
+ __func__, lcore, port, sz);
+ return -1;
+ }
+
+ mtb->len = n;
+ qconf->tx_mbufs[port] = mtb;
+
+ return 0;
+}
+
+static int
+setup_queue_tbl(struct rx_queue *rxq, uint32_t lcore, uint32_t queue)
+{
+ int socket;
+ uint32_t nb_mbuf;
+ uint64_t frag_cycles;
+ char buf[RTE_MEMPOOL_NAMESIZE];
+
+ socket = rte_lcore_to_socket_id(lcore);
+ if (socket == SOCKET_ID_ANY)
+ socket = 0;
+
+ frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S *
+ max_flow_ttl;
+
+ if ((rxq->frag_tbl = rte_ip_frag_table_create(max_flow_num,
+ IP_FRAG_TBL_BUCKET_ENTRIES, max_flow_num, frag_cycles,
+ socket)) == NULL) {
+ RTE_LOG(ERR, IP_RSMBL, "ip_frag_tbl_create(%u) on "
+ "lcore: %u for queue: %u failed\n",
+ max_flow_num, lcore, queue);
+ return -1;
+ }
+
+ /*
+ * At any given moment up to <max_flow_num * (MAX_FRAG_NUM)>
+ * mbufs could be stored int the fragment table.
+ * Plus, each TX queue can hold up to <max_flow_num> packets.
+ */
+
+ nb_mbuf = RTE_MAX(max_flow_num, 2UL * MAX_PKT_BURST) * MAX_FRAG_NUM;
+ nb_mbuf *= (port_conf.rxmode.max_rx_pkt_len + BUF_SIZE - 1) / BUF_SIZE;
+ nb_mbuf *= 2; /* ipv4 and ipv6 */
+ nb_mbuf += RTE_TEST_RX_DESC_DEFAULT + RTE_TEST_TX_DESC_DEFAULT;
+
+ nb_mbuf = RTE_MAX(nb_mbuf, (uint32_t)NB_MBUF);
+
+ snprintf(buf, sizeof(buf), "mbuf_pool_%u_%u", lcore, queue);
+
+ if ((rxq->pool = rte_mempool_create(buf, nb_mbuf, MBUF_SIZE, 0,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL, rte_pktmbuf_init, NULL,
+ socket, MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET)) == NULL) {
+ RTE_LOG(ERR, IP_RSMBL, "mempool_create(%s) failed", buf);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+init_mem(void)
+{
+ char buf[PATH_MAX];
+ struct rte_lpm *lpm;
+ struct rte_lpm6 *lpm6;
+ struct rte_lpm_config lpm_config;
+ int socket;
+ unsigned lcore_id;
+
+ /* traverse through lcores and initialize structures on each socket */
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ socket = rte_lcore_to_socket_id(lcore_id);
+
+ if (socket == SOCKET_ID_ANY)
+ socket = 0;
+
+ if (socket_lpm[socket] == NULL) {
+ RTE_LOG(INFO, IP_RSMBL, "Creating LPM table on socket %i\n", socket);
+ snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket);
+
+ lpm_config.max_rules = LPM_MAX_RULES;
+ lpm_config.number_tbl8s = 256;
+ lpm_config.flags = 0;
+
+ lpm = rte_lpm_create(buf, socket, &lpm_config);
+ if (lpm == NULL) {
+ RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n");
+ return -1;
+ }
+ socket_lpm[socket] = lpm;
+ }
+
+ if (socket_lpm6[socket] == NULL) {
+ RTE_LOG(INFO, IP_RSMBL, "Creating LPM6 table on socket %i\n", socket);
+ snprintf(buf, sizeof(buf), "IP_RSMBL_LPM_%i", socket);
+
+ lpm6 = rte_lpm6_create("IP_RSMBL_LPM6", socket, &lpm6_config);
+ if (lpm6 == NULL) {
+ RTE_LOG(ERR, IP_RSMBL, "Cannot create LPM table\n");
+ return -1;
+ }
+ socket_lpm6[socket] = lpm6;
+ }
+ }
+
+ return 0;
+}
+
+static void
+queue_dump_stat(void)
+{
+ uint32_t i, lcore;
+ const struct lcore_queue_conf *qconf;
+
+ for (lcore = 0; lcore < RTE_MAX_LCORE; lcore++) {
+ if (rte_lcore_is_enabled(lcore) == 0)
+ continue;
+
+ qconf = &lcore_queue_conf[lcore];
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ fprintf(stdout, " -- lcoreid=%u portid=%hhu "
+ "frag tbl stat:\n",
+ lcore, qconf->rx_queue_list[i].portid);
+ rte_ip_frag_table_statistics_dump(stdout,
+ qconf->rx_queue_list[i].frag_tbl);
+ fprintf(stdout, "TX bursts:\t%" PRIu64 "\n"
+ "TX packets _queued:\t%" PRIu64 "\n"
+ "TX packets dropped:\t%" PRIu64 "\n"
+ "TX packets send:\t%" PRIu64 "\n",
+ qconf->tx_stat.call,
+ qconf->tx_stat.queue,
+ qconf->tx_stat.drop,
+ qconf->tx_stat.send);
+ }
+ }
+}
+
+static void
+signal_handler(int signum)
+{
+ queue_dump_stat();
+ if (signum != SIGUSR1)
+ rte_exit(0, "received signal: %d, exiting\n", signum);
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ struct rx_queue *rxq;
+ int ret, socket;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id = 0, rx_lcore_id = 0;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid IP reassembly parameters\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+ else if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No ports found!\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize structures (mempools, lpm etc.) */
+ if (init_mem() < 0)
+ rte_panic("Cannot initialize memory structures!\n");
+
+ /* check if portmask has non-existent ports */
+ if (enabled_port_mask & ~(RTE_LEN2MASK(nb_ports, unsigned)))
+ rte_exit(EXIT_FAILURE, "Non-existent ports in portmask!\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
+
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+
+ qconf = &lcore_queue_conf[rx_lcore_id];
+ }
+
+ socket = rte_lcore_to_socket_id(portid);
+ if (socket == SOCKET_ID_ANY)
+ socket = 0;
+
+ queueid = qconf->n_rx_queue;
+ rxq = &qconf->rx_queue_list[queueid];
+ rxq->portid = portid;
+ rxq->lpm = socket_lpm[socket];
+ rxq->lpm6 = socket_lpm6[socket];
+ if (setup_queue_tbl(rxq, rx_lcore_id, queueid) < 0)
+ rte_exit(EXIT_FAILURE, "Failed to set up queue table\n");
+ qconf->n_rx_queue++;
+
+ /* init port */
+ printf("Initializing port %d ... ", portid );
+ fflush(stdout);
+
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
+ &port_conf);
+ if (ret < 0) {
+ printf("\n");
+ rte_exit(EXIT_FAILURE, "Cannot configure device: "
+ "err=%d, port=%d\n",
+ ret, portid);
+ }
+
+ /* init one RX queue */
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ socket, NULL,
+ rxq->pool);
+ if (ret < 0) {
+ printf("\n");
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: "
+ "err=%d, port=%d\n",
+ ret, portid);
+ }
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf("\n");
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ socket = (int) rte_lcore_to_socket_id(lcore_id);
+
+ printf("txq=%u,%d,%d ", lcore_id, queueid, socket);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socket, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_queue_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ setup_port_tbl(qconf, lcore_id, socket, portid);
+ queueid++;
+ }
+ printf("\n");
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ continue;
+ }
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ if (init_routing_table() < 0)
+ rte_exit(EXIT_FAILURE, "Cannot init routing table\n");
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ signal(SIGUSR1, signal_handler);
+ signal(SIGTERM, signal_handler);
+ signal(SIGINT, signal_handler);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ipsec-secgw/Makefile b/examples/ipsec-secgw/Makefile
new file mode 100644
index 00000000..f9b59c22
--- /dev/null
+++ b/examples/ipsec-secgw/Makefile
@@ -0,0 +1,62 @@
+# BSD LICENSE
+#
+# Copyright(c) 2016 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+ $(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+APP = ipsec-secgw
+
+CFLAGS += -O3 -gdwarf-2
+CFLAGS += $(WERROR_FLAGS)
+ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+CFLAGS_sa.o += -diag-disable=vec
+endif
+
+
+VPATH += $(SRCDIR)/librte_ipsec
+
+#
+# all source are stored in SRCS-y
+#
+SRCS-y += ipsec.c
+SRCS-y += esp.c
+SRCS-y += sp.c
+SRCS-y += sa.c
+SRCS-y += rt.c
+SRCS-y += ipsec-secgw.c
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ipsec-secgw/esp.c b/examples/ipsec-secgw/esp.c
new file mode 100644
index 00000000..19273807
--- /dev/null
+++ b/examples/ipsec-secgw/esp.c
@@ -0,0 +1,251 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_memcpy.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_random.h>
+
+#include "ipsec.h"
+#include "esp.h"
+#include "ipip.h"
+
+#define IP_ESP_HDR_SZ (sizeof(struct ip) + sizeof(struct esp_hdr))
+
+static inline void
+random_iv_u64(uint64_t *buf, uint16_t n)
+{
+ unsigned left = n & 0x7;
+ unsigned i;
+
+ IPSEC_ASSERT((n & 0x3) == 0);
+
+ for (i = 0; i < (n >> 3); i++)
+ buf[i] = rte_rand();
+
+ if (left)
+ *((uint32_t *)&buf[i]) = (uint32_t)lrand48();
+}
+
+/* IPv4 Tunnel */
+int
+esp4_tunnel_inbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop)
+{
+ int32_t payload_len;
+ struct rte_crypto_sym_op *sym_cop;
+
+ IPSEC_ASSERT(m != NULL);
+ IPSEC_ASSERT(sa != NULL);
+ IPSEC_ASSERT(cop != NULL);
+
+ payload_len = rte_pktmbuf_pkt_len(m) - IP_ESP_HDR_SZ - sa->iv_len -
+ sa->digest_len;
+
+ if ((payload_len & (sa->block_size - 1)) || (payload_len <= 0)) {
+ IPSEC_LOG(DEBUG, IPSEC_ESP, "payload %d not multiple of %u\n",
+ payload_len, sa->block_size);
+ return -EINVAL;
+ }
+
+ sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
+
+ sym_cop->m_src = m;
+ sym_cop->cipher.data.offset = IP_ESP_HDR_SZ + sa->iv_len;
+ sym_cop->cipher.data.length = payload_len;
+
+ sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(m, void*,
+ IP_ESP_HDR_SZ);
+ sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m,
+ IP_ESP_HDR_SZ);
+ sym_cop->cipher.iv.length = sa->iv_len;
+
+ sym_cop->auth.data.offset = sizeof(struct ip);
+ if (sa->auth_algo == RTE_CRYPTO_AUTH_AES_GCM)
+ sym_cop->auth.data.length = sizeof(struct esp_hdr);
+ else
+ sym_cop->auth.data.length = sizeof(struct esp_hdr) +
+ sa->iv_len + payload_len;
+
+ sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, void*,
+ rte_pktmbuf_pkt_len(m) - sa->digest_len);
+ sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m,
+ rte_pktmbuf_pkt_len(m) - sa->digest_len);
+ sym_cop->auth.digest.length = sa->digest_len;
+
+ return 0;
+}
+
+int
+esp4_tunnel_inbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop)
+{
+ uint8_t *nexthdr, *pad_len;
+ uint8_t *padding;
+ uint16_t i;
+
+ IPSEC_ASSERT(m != NULL);
+ IPSEC_ASSERT(sa != NULL);
+ IPSEC_ASSERT(cop != NULL);
+
+ if (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
+ IPSEC_LOG(ERR, IPSEC_ESP, "Failed crypto op\n");
+ return -1;
+ }
+
+ nexthdr = rte_pktmbuf_mtod_offset(m, uint8_t*,
+ rte_pktmbuf_pkt_len(m) - sa->digest_len - 1);
+ pad_len = nexthdr - 1;
+
+ padding = pad_len - *pad_len;
+ for (i = 0; i < *pad_len; i++) {
+ if (padding[i] != i) {
+ IPSEC_LOG(ERR, IPSEC_ESP, "invalid pad_len field\n");
+ return -EINVAL;
+ }
+ }
+
+ if (rte_pktmbuf_trim(m, *pad_len + 2 + sa->digest_len)) {
+ IPSEC_LOG(ERR, IPSEC_ESP,
+ "failed to remove pad_len + digest\n");
+ return -EINVAL;
+ }
+
+ return ip4ip_inbound(m, sizeof(struct esp_hdr) + sa->iv_len);
+}
+
+int
+esp4_tunnel_outbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop)
+{
+ uint16_t pad_payload_len, pad_len;
+ struct ip *ip;
+ struct esp_hdr *esp;
+ int i;
+ char *padding;
+ struct rte_crypto_sym_op *sym_cop;
+
+ IPSEC_ASSERT(m != NULL);
+ IPSEC_ASSERT(sa != NULL);
+ IPSEC_ASSERT(cop != NULL);
+
+ /* Payload length */
+ pad_payload_len = RTE_ALIGN_CEIL(rte_pktmbuf_pkt_len(m) + 2,
+ sa->block_size);
+ pad_len = pad_payload_len - rte_pktmbuf_pkt_len(m);
+
+ rte_prefetch0(rte_pktmbuf_mtod_offset(m, void *,
+ rte_pktmbuf_pkt_len(m)));
+
+ /* Check maximum packet size */
+ if (unlikely(IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len +
+ sa->digest_len > IP_MAXPACKET)) {
+ IPSEC_LOG(DEBUG, IPSEC_ESP, "ipsec packet is too big\n");
+ return -EINVAL;
+ }
+
+ padding = rte_pktmbuf_append(m, pad_len + sa->digest_len);
+
+ IPSEC_ASSERT(padding != NULL);
+
+ ip = ip4ip_outbound(m, sizeof(struct esp_hdr) + sa->iv_len,
+ sa->src, sa->dst);
+
+ esp = (struct esp_hdr *)(ip + 1);
+ esp->spi = sa->spi;
+ esp->seq = htonl(sa->seq++);
+
+ IPSEC_LOG(DEBUG, IPSEC_ESP, "pktlen %u\n", rte_pktmbuf_pkt_len(m));
+
+ /* Fill pad_len using default sequential scheme */
+ for (i = 0; i < pad_len - 2; i++)
+ padding[i] = i + 1;
+
+ padding[pad_len - 2] = pad_len - 2;
+ padding[pad_len - 1] = IPPROTO_IPIP;
+
+ sym_cop = (struct rte_crypto_sym_op *)(cop + 1);
+
+ sym_cop->m_src = m;
+ sym_cop->cipher.data.offset = IP_ESP_HDR_SZ + sa->iv_len;
+ sym_cop->cipher.data.length = pad_payload_len;
+
+ sym_cop->cipher.iv.data = rte_pktmbuf_mtod_offset(m, uint8_t *,
+ IP_ESP_HDR_SZ);
+ sym_cop->cipher.iv.phys_addr = rte_pktmbuf_mtophys_offset(m,
+ IP_ESP_HDR_SZ);
+ sym_cop->cipher.iv.length = sa->iv_len;
+
+ sym_cop->auth.data.offset = sizeof(struct ip);
+ sym_cop->auth.data.length = sizeof(struct esp_hdr) + sa->iv_len +
+ pad_payload_len;
+
+ sym_cop->auth.digest.data = rte_pktmbuf_mtod_offset(m, uint8_t *,
+ IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len);
+ sym_cop->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m,
+ IP_ESP_HDR_SZ + sa->iv_len + pad_payload_len);
+ sym_cop->auth.digest.length = sa->digest_len;
+
+ if (sa->cipher_algo == RTE_CRYPTO_CIPHER_AES_CBC)
+ random_iv_u64((uint64_t *)sym_cop->cipher.iv.data,
+ sym_cop->cipher.iv.length);
+
+ return 0;
+}
+
+int
+esp4_tunnel_outbound_post_crypto(struct rte_mbuf *m __rte_unused,
+ struct ipsec_sa *sa __rte_unused,
+ struct rte_crypto_op *cop)
+{
+ IPSEC_ASSERT(m != NULL);
+ IPSEC_ASSERT(sa != NULL);
+ IPSEC_ASSERT(cop != NULL);
+
+ if (cop->status != RTE_CRYPTO_OP_STATUS_SUCCESS) {
+ IPSEC_LOG(ERR, IPSEC_ESP, "Failed crypto op\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ipsec-secgw/esp.h b/examples/ipsec-secgw/esp.h
new file mode 100644
index 00000000..31018823
--- /dev/null
+++ b/examples/ipsec-secgw/esp.h
@@ -0,0 +1,66 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __RTE_IPSEC_XFORM_ESP_H__
+#define __RTE_IPSEC_XFORM_ESP_H__
+
+struct mbuf;
+
+/* RFC4303 */
+struct esp_hdr {
+ uint32_t spi;
+ uint32_t seq;
+ /* Payload */
+ /* Padding */
+ /* Pad Length */
+ /* Next Header */
+ /* Integrity Check Value - ICV */
+};
+
+/* IPv4 Tunnel */
+int
+esp4_tunnel_inbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop);
+
+int
+esp4_tunnel_inbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop);
+
+int
+esp4_tunnel_outbound_pre_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop);
+
+int
+esp4_tunnel_outbound_post_crypto(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop);
+
+#endif /* __RTE_IPSEC_XFORM_ESP_H__ */
diff --git a/examples/ipsec-secgw/ipip.h b/examples/ipsec-secgw/ipip.h
new file mode 100644
index 00000000..322076ce
--- /dev/null
+++ b/examples/ipsec-secgw/ipip.h
@@ -0,0 +1,103 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPIP_H__
+#define __IPIP_H__
+
+#include <stdint.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <rte_mbuf.h>
+
+#define IPV6_VERSION (6)
+
+static inline struct ip *
+ip4ip_outbound(struct rte_mbuf *m, uint32_t offset, uint32_t src, uint32_t dst)
+{
+ struct ip *inip, *outip;
+
+ inip = rte_pktmbuf_mtod(m, struct ip*);
+
+ IPSEC_ASSERT(inip->ip_v == IPVERSION || inip->ip_v == IPV6_VERSION);
+
+ offset += sizeof(struct ip);
+
+ outip = (struct ip *)rte_pktmbuf_prepend(m, offset);
+
+ IPSEC_ASSERT(outip != NULL);
+
+ /* Per RFC4301 5.1.2.1 */
+ outip->ip_v = IPVERSION;
+ outip->ip_hl = 5;
+ outip->ip_tos = inip->ip_tos;
+ outip->ip_len = htons(rte_pktmbuf_data_len(m));
+
+ outip->ip_id = 0;
+ outip->ip_off = 0;
+
+ outip->ip_ttl = IPDEFTTL;
+ outip->ip_p = IPPROTO_ESP;
+
+ outip->ip_src.s_addr = src;
+ outip->ip_dst.s_addr = dst;
+
+ return outip;
+}
+
+static inline int
+ip4ip_inbound(struct rte_mbuf *m, uint32_t offset)
+{
+ struct ip *inip;
+ struct ip *outip;
+
+ outip = rte_pktmbuf_mtod(m, struct ip*);
+
+ IPSEC_ASSERT(outip->ip_v == IPVERSION);
+
+ offset += sizeof(struct ip);
+ inip = (struct ip *)rte_pktmbuf_adj(m, offset);
+ IPSEC_ASSERT(inip->ip_v == IPVERSION || inip->ip_v == IPV6_VERSION);
+
+ /* Check packet is still bigger than IP header (inner) */
+ IPSEC_ASSERT(rte_pktmbuf_pkt_len(m) > sizeof(struct ip));
+
+ /* RFC4301 5.1.2.1 Note 6 */
+ if ((inip->ip_tos & htons(IPTOS_ECN_ECT0 | IPTOS_ECN_ECT1)) &&
+ ((outip->ip_tos & htons(IPTOS_ECN_CE)) == IPTOS_ECN_CE))
+ inip->ip_tos |= htons(IPTOS_ECN_CE);
+
+ return 0;
+}
+
+#endif /* __IPIP_H__ */
diff --git a/examples/ipsec-secgw/ipsec-secgw.c b/examples/ipsec-secgw/ipsec-secgw.c
new file mode 100644
index 00000000..00ab2d84
--- /dev/null
+++ b/examples/ipsec-secgw/ipsec-secgw.c
@@ -0,0 +1,1362 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_acl.h>
+#include <rte_lpm.h>
+#include <rte_hash.h>
+#include <rte_jhash.h>
+#include <rte_cryptodev.h>
+
+#include "ipsec.h"
+
+#define RTE_LOGTYPE_IPSEC RTE_LOGTYPE_USER1
+
+#define MAX_JUMBO_PKT_LEN 9600
+
+#define MEMPOOL_CACHE_SIZE 256
+
+#define NB_MBUF (32000)
+
+#define CDEV_MAP_ENTRIES 1024
+#define CDEV_MP_NB_OBJS 2048
+#define CDEV_MP_CACHE_SZ 64
+#define MAX_QUEUE_PAIRS 1
+
+#define OPTION_CONFIG "config"
+#define OPTION_SINGLE_SA "single-sa"
+#define OPTION_EP0 "ep0"
+#define OPTION_EP1 "ep1"
+
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define NB_SOCKETS 4
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+
+#define MAX_LCORE_PARAMS 1024
+
+#define UNPROTECTED_PORT(port) (unprotected_port_mask & (1 << portid))
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define IPSEC_SECGW_RX_DESC_DEFAULT 128
+#define IPSEC_SECGW_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = IPSEC_SECGW_RX_DESC_DEFAULT;
+static uint16_t nb_txd = IPSEC_SECGW_TX_DESC_DEFAULT;
+
+#if RTE_BYTE_ORDER != RTE_LITTLE_ENDIAN
+#define __BYTES_TO_UINT64(a, b, c, d, e, f, g, h) \
+ (((uint64_t)((a) & 0xff) << 56) | \
+ ((uint64_t)((b) & 0xff) << 48) | \
+ ((uint64_t)((c) & 0xff) << 40) | \
+ ((uint64_t)((d) & 0xff) << 32) | \
+ ((uint64_t)((e) & 0xff) << 24) | \
+ ((uint64_t)((f) & 0xff) << 16) | \
+ ((uint64_t)((g) & 0xff) << 8) | \
+ ((uint64_t)(h) & 0xff))
+#else
+#define __BYTES_TO_UINT64(a, b, c, d, e, f, g, h) \
+ (((uint64_t)((h) & 0xff) << 56) | \
+ ((uint64_t)((g) & 0xff) << 48) | \
+ ((uint64_t)((f) & 0xff) << 40) | \
+ ((uint64_t)((e) & 0xff) << 32) | \
+ ((uint64_t)((d) & 0xff) << 24) | \
+ ((uint64_t)((c) & 0xff) << 16) | \
+ ((uint64_t)((b) & 0xff) << 8) | \
+ ((uint64_t)(a) & 0xff))
+#endif
+#define ETHADDR(a, b, c, d, e, f) (__BYTES_TO_UINT64(a, b, c, d, e, f, 0, 0))
+
+#define ETHADDR_TO_UINT64(addr) __BYTES_TO_UINT64( \
+ addr.addr_bytes[0], addr.addr_bytes[1], \
+ addr.addr_bytes[2], addr.addr_bytes[3], \
+ addr.addr_bytes[4], addr.addr_bytes[5], \
+ 0, 0)
+
+/* port/source ethernet addr and destination ethernet addr */
+struct ethaddr_info {
+ uint64_t src, dst;
+};
+
+struct ethaddr_info ethaddr_tbl[RTE_MAX_ETHPORTS] = {
+ { 0, ETHADDR(0x00, 0x16, 0x3e, 0x7e, 0x94, 0x9a) },
+ { 0, ETHADDR(0x00, 0x16, 0x3e, 0x22, 0xa1, 0xd9) },
+ { 0, ETHADDR(0x00, 0x16, 0x3e, 0x08, 0x69, 0x26) },
+ { 0, ETHADDR(0x00, 0x16, 0x3e, 0x49, 0x9e, 0xdd) }
+};
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+static uint32_t unprotected_port_mask;
+static int32_t promiscuous_on = 1;
+static int32_t numa_on = 1; /**< NUMA is enabled by default. */
+static int32_t ep = -1; /**< Endpoint configuration (0 or 1) */
+static uint32_t nb_lcores;
+static uint32_t single_sa;
+static uint32_t single_sa_idx;
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+
+static struct lcore_params *lcore_params;
+static uint16_t nb_lcore_params;
+
+static struct rte_hash *cdev_map_in;
+static struct rte_hash *cdev_map_out;
+
+struct buffer {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST] __rte_aligned(sizeof(void *));
+};
+
+struct lcore_conf {
+ uint16_t nb_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct buffer tx_mbufs[RTE_MAX_ETHPORTS];
+ struct ipsec_ctx inbound;
+ struct ipsec_ctx outbound;
+ struct rt_ctx *rt_ctx;
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
+ ETH_RSS_TCP | ETH_RSS_SCTP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct socket_ctx socket_ctx[NB_SOCKETS];
+
+struct traffic_type {
+ const uint8_t *data[MAX_PKT_BURST * 2];
+ struct rte_mbuf *pkts[MAX_PKT_BURST * 2];
+ uint32_t res[MAX_PKT_BURST * 2];
+ uint32_t num;
+};
+
+struct ipsec_traffic {
+ struct traffic_type ipsec4;
+ struct traffic_type ipv4;
+};
+
+static inline void
+prepare_one_packet(struct rte_mbuf *pkt, struct ipsec_traffic *t)
+{
+ uint8_t *nlp;
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ rte_pktmbuf_adj(pkt, ETHER_HDR_LEN);
+ nlp = rte_pktmbuf_mtod_offset(pkt, uint8_t *,
+ offsetof(struct ip, ip_p));
+ if (*nlp == IPPROTO_ESP)
+ t->ipsec4.pkts[(t->ipsec4.num)++] = pkt;
+ else {
+ t->ipv4.data[t->ipv4.num] = nlp;
+ t->ipv4.pkts[(t->ipv4.num)++] = pkt;
+ }
+ } else {
+ /* Unknown/Unsupported type, drop the packet */
+ rte_pktmbuf_free(pkt);
+ }
+}
+
+static inline void
+prepare_traffic(struct rte_mbuf **pkts, struct ipsec_traffic *t,
+ uint16_t nb_pkts)
+{
+ int32_t i;
+
+ t->ipsec4.num = 0;
+ t->ipv4.num = 0;
+
+ for (i = 0; i < (nb_pkts - PREFETCH_OFFSET); i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts[i + PREFETCH_OFFSET],
+ void *));
+ prepare_one_packet(pkts[i], t);
+ }
+ /* Process left packets */
+ for (; i < nb_pkts; i++)
+ prepare_one_packet(pkts[i], t);
+}
+
+static inline void
+prepare_tx_pkt(struct rte_mbuf *pkt, uint8_t port)
+{
+ pkt->ol_flags |= PKT_TX_IP_CKSUM | PKT_TX_IPV4;
+ pkt->l3_len = sizeof(struct ip);
+ pkt->l2_len = ETHER_HDR_LEN;
+
+ struct ether_hdr *ethhdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt,
+ ETHER_HDR_LEN);
+
+ ethhdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+ memcpy(&ethhdr->s_addr, &ethaddr_tbl[port].src,
+ sizeof(struct ether_addr));
+ memcpy(&ethhdr->d_addr, &ethaddr_tbl[port].dst,
+ sizeof(struct ether_addr));
+}
+
+static inline void
+prepare_tx_burst(struct rte_mbuf *pkts[], uint16_t nb_pkts, uint8_t port)
+{
+ int32_t i;
+ const int32_t prefetch_offset = 2;
+
+ for (i = 0; i < (nb_pkts - prefetch_offset); i++) {
+ rte_prefetch0(pkts[i + prefetch_offset]->cacheline1);
+ prepare_tx_pkt(pkts[i], port);
+ }
+ /* Process left packets */
+ for (; i < nb_pkts; i++)
+ prepare_tx_pkt(pkts[i], port);
+}
+
+/* Send burst of packets on an output interface */
+static inline int32_t
+send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ int32_t ret;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ prepare_tx_burst(m_table, n, port);
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int32_t
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint32_t lcore_id;
+ uint16_t len;
+ struct lcore_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_conf[lcore_id];
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+static inline void
+process_pkts_inbound(struct ipsec_ctx *ipsec_ctx,
+ struct ipsec_traffic *traffic)
+{
+ struct rte_mbuf *m;
+ uint16_t idx, nb_pkts_in, i, j;
+ uint32_t sa_idx, res;
+
+ nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec4.pkts,
+ traffic->ipsec4.num, MAX_PKT_BURST);
+
+ /* SP/ACL Inbound check ipsec and ipv4 */
+ for (i = 0; i < nb_pkts_in; i++) {
+ idx = traffic->ipv4.num++;
+ m = traffic->ipsec4.pkts[i];
+ traffic->ipv4.pkts[idx] = m;
+ traffic->ipv4.data[idx] = rte_pktmbuf_mtod_offset(m,
+ uint8_t *, offsetof(struct ip, ip_p));
+ }
+
+ rte_acl_classify((struct rte_acl_ctx *)ipsec_ctx->sp_ctx,
+ traffic->ipv4.data, traffic->ipv4.res,
+ traffic->ipv4.num, DEFAULT_MAX_CATEGORIES);
+
+ j = 0;
+ for (i = 0; i < traffic->ipv4.num - nb_pkts_in; i++) {
+ m = traffic->ipv4.pkts[i];
+ res = traffic->ipv4.res[i];
+ if (res & ~BYPASS) {
+ rte_pktmbuf_free(m);
+ continue;
+ }
+ traffic->ipv4.pkts[j++] = m;
+ }
+ /* Check return SA SPI matches pkt SPI */
+ for ( ; i < traffic->ipv4.num; i++) {
+ m = traffic->ipv4.pkts[i];
+ sa_idx = traffic->ipv4.res[i] & PROTECT_MASK;
+ if (sa_idx == 0 || !inbound_sa_check(ipsec_ctx->sa_ctx,
+ m, sa_idx)) {
+ rte_pktmbuf_free(m);
+ continue;
+ }
+ traffic->ipv4.pkts[j++] = m;
+ }
+ traffic->ipv4.num = j;
+}
+
+static inline void
+process_pkts_outbound(struct ipsec_ctx *ipsec_ctx,
+ struct ipsec_traffic *traffic)
+{
+ struct rte_mbuf *m;
+ uint16_t idx, nb_pkts_out, i, j;
+ uint32_t sa_idx, res;
+
+ rte_acl_classify((struct rte_acl_ctx *)ipsec_ctx->sp_ctx,
+ traffic->ipv4.data, traffic->ipv4.res,
+ traffic->ipv4.num, DEFAULT_MAX_CATEGORIES);
+
+ /* Drop any IPsec traffic from protected ports */
+ for (i = 0; i < traffic->ipsec4.num; i++)
+ rte_pktmbuf_free(traffic->ipsec4.pkts[i]);
+
+ traffic->ipsec4.num = 0;
+
+ j = 0;
+ for (i = 0; i < traffic->ipv4.num; i++) {
+ m = traffic->ipv4.pkts[i];
+ res = traffic->ipv4.res[i];
+ sa_idx = res & PROTECT_MASK;
+ if ((res == 0) || (res & DISCARD))
+ rte_pktmbuf_free(m);
+ else if (sa_idx != 0) {
+ traffic->ipsec4.res[traffic->ipsec4.num] = sa_idx;
+ traffic->ipsec4.pkts[traffic->ipsec4.num++] = m;
+ } else /* BYPASS */
+ traffic->ipv4.pkts[j++] = m;
+ }
+ traffic->ipv4.num = j;
+
+ nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipsec4.pkts,
+ traffic->ipsec4.res, traffic->ipsec4.num,
+ MAX_PKT_BURST);
+
+ for (i = 0; i < nb_pkts_out; i++) {
+ idx = traffic->ipv4.num++;
+ m = traffic->ipsec4.pkts[i];
+ traffic->ipv4.pkts[idx] = m;
+ }
+}
+
+static inline void
+process_pkts_inbound_nosp(struct ipsec_ctx *ipsec_ctx,
+ struct ipsec_traffic *traffic)
+{
+ uint16_t nb_pkts_in, i;
+
+ /* Drop any IPv4 traffic from unprotected ports */
+ for (i = 0; i < traffic->ipv4.num; i++)
+ rte_pktmbuf_free(traffic->ipv4.pkts[i]);
+
+ traffic->ipv4.num = 0;
+
+ nb_pkts_in = ipsec_inbound(ipsec_ctx, traffic->ipsec4.pkts,
+ traffic->ipsec4.num, MAX_PKT_BURST);
+
+ for (i = 0; i < nb_pkts_in; i++)
+ traffic->ipv4.pkts[i] = traffic->ipsec4.pkts[i];
+
+ traffic->ipv4.num = nb_pkts_in;
+}
+
+static inline void
+process_pkts_outbound_nosp(struct ipsec_ctx *ipsec_ctx,
+ struct ipsec_traffic *traffic)
+{
+ uint16_t nb_pkts_out, i;
+
+ /* Drop any IPsec traffic from protected ports */
+ for (i = 0; i < traffic->ipsec4.num; i++)
+ rte_pktmbuf_free(traffic->ipsec4.pkts[i]);
+
+ traffic->ipsec4.num = 0;
+
+ for (i = 0; i < traffic->ipv4.num; i++)
+ traffic->ipv4.res[i] = single_sa_idx;
+
+ nb_pkts_out = ipsec_outbound(ipsec_ctx, traffic->ipv4.pkts,
+ traffic->ipv4.res, traffic->ipv4.num,
+ MAX_PKT_BURST);
+
+ traffic->ipv4.num = nb_pkts_out;
+}
+
+static inline void
+route_pkts(struct rt_ctx *rt_ctx, struct rte_mbuf *pkts[], uint8_t nb_pkts)
+{
+ uint32_t hop[MAX_PKT_BURST * 2];
+ uint32_t dst_ip[MAX_PKT_BURST * 2];
+ uint16_t i, offset;
+
+ if (nb_pkts == 0)
+ return;
+
+ for (i = 0; i < nb_pkts; i++) {
+ offset = offsetof(struct ip, ip_dst);
+ dst_ip[i] = *rte_pktmbuf_mtod_offset(pkts[i],
+ uint32_t *, offset);
+ dst_ip[i] = rte_be_to_cpu_32(dst_ip[i]);
+ }
+
+ rte_lpm_lookup_bulk((struct rte_lpm *)rt_ctx, dst_ip, hop, nb_pkts);
+
+ for (i = 0; i < nb_pkts; i++) {
+ if ((hop[i] & RTE_LPM_LOOKUP_SUCCESS) == 0) {
+ rte_pktmbuf_free(pkts[i]);
+ continue;
+ }
+ send_single_packet(pkts[i], hop[i] & 0xff);
+ }
+}
+
+static inline void
+process_pkts(struct lcore_conf *qconf, struct rte_mbuf **pkts,
+ uint8_t nb_pkts, uint8_t portid)
+{
+ struct ipsec_traffic traffic;
+
+ prepare_traffic(pkts, &traffic, nb_pkts);
+
+ if (single_sa) {
+ if (UNPROTECTED_PORT(portid))
+ process_pkts_inbound_nosp(&qconf->inbound, &traffic);
+ else
+ process_pkts_outbound_nosp(&qconf->outbound, &traffic);
+ } else {
+ if (UNPROTECTED_PORT(portid))
+ process_pkts_inbound(&qconf->inbound, &traffic);
+ else
+ process_pkts_outbound(&qconf->outbound, &traffic);
+ }
+
+ route_pkts(qconf->rt_ctx, traffic.ipv4.pkts, traffic.ipv4.num);
+}
+
+static inline void
+drain_buffers(struct lcore_conf *qconf)
+{
+ struct buffer *buf;
+ uint32_t portid;
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ buf = &qconf->tx_mbufs[portid];
+ if (buf->len == 0)
+ continue;
+ send_burst(qconf, buf->len, portid);
+ buf->len = 0;
+ }
+}
+
+/* main processing loop */
+static int32_t
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts[MAX_PKT_BURST];
+ uint32_t lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int32_t i, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ int32_t socket_id;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
+ / US_PER_S * BURST_TX_DRAIN_US;
+ struct lcore_rx_queue *rxql;
+
+ prev_tsc = 0;
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+ rxql = qconf->rx_queue_list;
+ socket_id = rte_lcore_to_socket_id(lcore_id);
+
+ qconf->rt_ctx = socket_ctx[socket_id].rt_ipv4;
+ qconf->inbound.sp_ctx = socket_ctx[socket_id].sp_ipv4_in;
+ qconf->inbound.sa_ctx = socket_ctx[socket_id].sa_ipv4_in;
+ qconf->inbound.cdev_map = cdev_map_in;
+ qconf->outbound.sp_ctx = socket_ctx[socket_id].sp_ipv4_out;
+ qconf->outbound.sa_ctx = socket_ctx[socket_id].sa_ipv4_out;
+ qconf->outbound.cdev_map = cdev_map_out;
+
+ if (qconf->nb_rx_queue == 0) {
+ RTE_LOG(INFO, IPSEC, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, IPSEC, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->nb_rx_queue; i++) {
+ portid = rxql[i].port_id;
+ queueid = rxql[i].queue_id;
+ RTE_LOG(INFO, IPSEC,
+ " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ while (1) {
+ cur_tsc = rte_rdtsc();
+
+ /* TX queue buffer drain */
+ diff_tsc = cur_tsc - prev_tsc;
+
+ if (unlikely(diff_tsc > drain_tsc)) {
+ drain_buffers(qconf);
+ prev_tsc = cur_tsc;
+ }
+
+ /* Read packet from RX queues */
+ for (i = 0; i < qconf->nb_rx_queue; ++i) {
+ portid = rxql[i].port_id;
+ queueid = rxql[i].queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid,
+ pkts, MAX_PKT_BURST);
+
+ if (nb_rx > 0)
+ process_pkts(qconf, pkts, nb_rx, portid);
+ }
+ }
+}
+
+static int32_t
+check_params(void)
+{
+ uint8_t lcore, portid, nb_ports;
+ uint16_t i;
+ int32_t socket_id;
+
+ if (lcore_params == NULL) {
+ printf("Error: No port/queue/core mappings\n");
+ return -1;
+ }
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in "
+ "lcore mask\n", lcore);
+ return -1;
+ }
+ socket_id = rte_lcore_to_socket_id(lcore);
+ if (socket_id != 0 && numa_on == 0) {
+ printf("warning: lcore %hhu is on socket %d "
+ "with numa off\n",
+ lcore, socket_id);
+ }
+ portid = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_nb_rx_queues(const uint8_t port)
+{
+ int32_t queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port &&
+ lcore_params[i].queue_id > queue)
+ queue = lcore_params[i].queue_id;
+ }
+ return (uint8_t)(++queue);
+}
+
+static int32_t
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].nb_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ nb_rx_queue + 1, lcore);
+ return -1;
+ }
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].nb_rx_queue++;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK -P -u PORTMASK"
+ " --"OPTION_CONFIG" (port,queue,lcore)[,(port,queue,lcore]"
+ " --single-sa SAIDX --ep0|--ep1\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -P : enable promiscuous mode\n"
+ " -u PORTMASK: hexadecimal bitmask of unprotected ports\n"
+ " --"OPTION_CONFIG": (port,queue,lcore): "
+ "rx queues configuration\n"
+ " --single-sa SAIDX: use single SA index for outbound, "
+ "bypassing the SP\n"
+ " --ep0: Configure as Endpoint 0\n"
+ " --ep1: Configure as Endpoint 1\n", prgname);
+}
+
+static int32_t
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if ((pm == 0) && errno)
+ return -1;
+
+ return pm;
+}
+
+static int32_t
+parse_decimal(const char *str)
+{
+ char *end = NULL;
+ unsigned long num;
+
+ num = strtoul(str, &end, 10);
+ if ((str[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ return num;
+}
+
+static int32_t
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ int long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int32_t i;
+ uint32_t size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ p0 = strchr(p, ')');
+ if (p0 == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
+ _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+ return 0;
+}
+
+#define __STRNCMP(name, opt) (!strncmp(name, opt, sizeof(opt)))
+static int32_t
+parse_args_long_options(struct option *lgopts, int32_t option_index)
+{
+ int32_t ret = -1;
+ const char *optname = lgopts[option_index].name;
+
+ if (__STRNCMP(optname, OPTION_CONFIG)) {
+ ret = parse_config(optarg);
+ if (ret)
+ printf("invalid config\n");
+ }
+
+ if (__STRNCMP(optname, OPTION_SINGLE_SA)) {
+ ret = parse_decimal(optarg);
+ if (ret != -1) {
+ single_sa = 1;
+ single_sa_idx = ret;
+ printf("Configured with single SA index %u\n",
+ single_sa_idx);
+ ret = 0;
+ }
+ }
+
+ if (__STRNCMP(optname, OPTION_EP0)) {
+ printf("endpoint 0\n");
+ ep = 0;
+ ret = 0;
+ }
+
+ if (__STRNCMP(optname, OPTION_EP1)) {
+ printf("endpoint 1\n");
+ ep = 1;
+ ret = 0;
+ }
+
+ return ret;
+}
+#undef __STRNCMP
+
+static int32_t
+parse_args(int32_t argc, char **argv)
+{
+ int32_t opt, ret;
+ char **argvopt;
+ int32_t option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {OPTION_CONFIG, 1, 0, 0},
+ {OPTION_SINGLE_SA, 1, 0, 0},
+ {OPTION_EP0, 0, 0, 0},
+ {OPTION_EP1, 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:Pu:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'P':
+ printf("Promiscuous mode selected\n");
+ promiscuous_on = 1;
+ break;
+ case 'u':
+ unprotected_port_mask = parse_portmask(optarg);
+ if (unprotected_port_mask == 0) {
+ printf("invalid unprotected portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 0:
+ if (parse_args_long_options(lgopts, option_index)) {
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (uint32_t)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+static int32_t
+add_mapping(struct rte_hash *map, const char *str, uint16_t cdev_id,
+ uint16_t qp, struct lcore_params *params,
+ struct ipsec_ctx *ipsec_ctx,
+ const struct rte_cryptodev_capabilities *cipher,
+ const struct rte_cryptodev_capabilities *auth)
+{
+ int32_t ret = 0;
+ unsigned long i;
+ struct cdev_key key = { 0 };
+
+ key.lcore_id = params->lcore_id;
+ if (cipher)
+ key.cipher_algo = cipher->sym.cipher.algo;
+ if (auth)
+ key.auth_algo = auth->sym.auth.algo;
+
+ ret = rte_hash_lookup(map, &key);
+ if (ret != -ENOENT)
+ return 0;
+
+ for (i = 0; i < ipsec_ctx->nb_qps; i++)
+ if (ipsec_ctx->tbl[i].id == cdev_id)
+ break;
+
+ if (i == ipsec_ctx->nb_qps) {
+ if (ipsec_ctx->nb_qps == MAX_QP_PER_LCORE) {
+ printf("Maximum number of crypto devices assigned to "
+ "a core, increase MAX_QP_PER_LCORE value\n");
+ return 0;
+ }
+ ipsec_ctx->tbl[i].id = cdev_id;
+ ipsec_ctx->tbl[i].qp = qp;
+ ipsec_ctx->nb_qps++;
+ printf("%s cdev mapping: lcore %u using cdev %u qp %u "
+ "(cdev_id_qp %lu)\n", str, key.lcore_id,
+ cdev_id, qp, i);
+ }
+
+ ret = rte_hash_add_key_data(map, &key, (void *)i);
+ if (ret < 0) {
+ printf("Faled to insert cdev mapping for (lcore %u, "
+ "cdev %u, qp %u), errno %d\n",
+ key.lcore_id, ipsec_ctx->tbl[i].id,
+ ipsec_ctx->tbl[i].qp, ret);
+ return 0;
+ }
+
+ return 1;
+}
+
+static int32_t
+add_cdev_mapping(struct rte_cryptodev_info *dev_info, uint16_t cdev_id,
+ uint16_t qp, struct lcore_params *params)
+{
+ int32_t ret = 0;
+ const struct rte_cryptodev_capabilities *i, *j;
+ struct rte_hash *map;
+ struct lcore_conf *qconf;
+ struct ipsec_ctx *ipsec_ctx;
+ const char *str;
+
+ qconf = &lcore_conf[params->lcore_id];
+
+ if ((unprotected_port_mask & (1 << params->port_id)) == 0) {
+ map = cdev_map_out;
+ ipsec_ctx = &qconf->outbound;
+ str = "Outbound";
+ } else {
+ map = cdev_map_in;
+ ipsec_ctx = &qconf->inbound;
+ str = "Inbound";
+ }
+
+ /* Required cryptodevs with operation chainning */
+ if (!(dev_info->feature_flags &
+ RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING))
+ return ret;
+
+ for (i = dev_info->capabilities;
+ i->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; i++) {
+ if (i->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+ continue;
+
+ if (i->sym.xform_type != RTE_CRYPTO_SYM_XFORM_CIPHER)
+ continue;
+
+ for (j = dev_info->capabilities;
+ j->op != RTE_CRYPTO_OP_TYPE_UNDEFINED; j++) {
+ if (j->op != RTE_CRYPTO_OP_TYPE_SYMMETRIC)
+ continue;
+
+ if (j->sym.xform_type != RTE_CRYPTO_SYM_XFORM_AUTH)
+ continue;
+
+ ret |= add_mapping(map, str, cdev_id, qp, params,
+ ipsec_ctx, i, j);
+ }
+ }
+
+ return ret;
+}
+
+static int32_t
+cryptodevs_init(void)
+{
+ struct rte_cryptodev_config dev_conf;
+ struct rte_cryptodev_qp_conf qp_conf;
+ uint16_t idx, max_nb_qps, qp, i;
+ int16_t cdev_id;
+ struct rte_hash_parameters params = { 0 };
+
+ params.entries = CDEV_MAP_ENTRIES;
+ params.key_len = sizeof(struct cdev_key);
+ params.hash_func = rte_jhash;
+ params.hash_func_init_val = 0;
+ params.socket_id = rte_socket_id();
+
+ params.name = "cdev_map_in";
+ cdev_map_in = rte_hash_create(&params);
+ if (cdev_map_in == NULL)
+ rte_panic("Failed to create cdev_map hash table, errno = %d\n",
+ rte_errno);
+
+ params.name = "cdev_map_out";
+ cdev_map_out = rte_hash_create(&params);
+ if (cdev_map_out == NULL)
+ rte_panic("Failed to create cdev_map hash table, errno = %d\n",
+ rte_errno);
+
+ printf("lcore/cryptodev/qp mappings:\n");
+
+ idx = 0;
+ /* Start from last cdev id to give HW priority */
+ for (cdev_id = rte_cryptodev_count() - 1; cdev_id >= 0; cdev_id--) {
+ struct rte_cryptodev_info cdev_info;
+
+ rte_cryptodev_info_get(cdev_id, &cdev_info);
+
+ if (nb_lcore_params > cdev_info.max_nb_queue_pairs)
+ max_nb_qps = cdev_info.max_nb_queue_pairs;
+ else
+ max_nb_qps = nb_lcore_params;
+
+ qp = 0;
+ i = 0;
+ while (qp < max_nb_qps && i < nb_lcore_params) {
+ if (add_cdev_mapping(&cdev_info, cdev_id, qp,
+ &lcore_params[idx]))
+ qp++;
+ idx++;
+ idx = idx % nb_lcore_params;
+ i++;
+ }
+
+ if (qp == 0)
+ continue;
+
+ dev_conf.socket_id = rte_cryptodev_socket_id(cdev_id);
+ dev_conf.nb_queue_pairs = qp;
+ dev_conf.session_mp.nb_objs = CDEV_MP_NB_OBJS;
+ dev_conf.session_mp.cache_size = CDEV_MP_CACHE_SZ;
+
+ if (rte_cryptodev_configure(cdev_id, &dev_conf))
+ rte_panic("Failed to initialize crypodev %u\n",
+ cdev_id);
+
+ qp_conf.nb_descriptors = CDEV_MP_NB_OBJS;
+ for (qp = 0; qp < dev_conf.nb_queue_pairs; qp++)
+ if (rte_cryptodev_queue_pair_setup(cdev_id, qp,
+ &qp_conf, dev_conf.socket_id))
+ rte_panic("Failed to setup queue %u for "
+ "cdev_id %u\n", 0, cdev_id);
+ }
+
+ printf("\n");
+
+ return 0;
+}
+
+static void
+port_init(uint8_t portid)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ uint16_t nb_tx_queue, nb_rx_queue;
+ uint16_t tx_queueid, rx_queueid, queue, lcore_id;
+ int32_t ret, socket_id;
+ struct lcore_conf *qconf;
+ struct ether_addr ethaddr;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+
+ printf("Configuring device port %u:\n", portid);
+
+ rte_eth_macaddr_get(portid, &ethaddr);
+ ethaddr_tbl[portid].src = ETHADDR_TO_UINT64(ethaddr);
+ print_ethaddr("Address: ", &ethaddr);
+ printf("\n");
+
+ nb_rx_queue = get_port_nb_rx_queues(portid);
+ nb_tx_queue = nb_lcores;
+
+ if (nb_rx_queue > dev_info.max_rx_queues)
+ rte_exit(EXIT_FAILURE, "Error: queue %u not available "
+ "(max rx queue is %u)\n",
+ nb_rx_queue, dev_info.max_rx_queues);
+
+ if (nb_tx_queue > dev_info.max_tx_queues)
+ rte_exit(EXIT_FAILURE, "Error: queue %u not available "
+ "(max tx queue is %u)\n",
+ nb_tx_queue, dev_info.max_tx_queues);
+
+ printf("Creating queues: nb_rx_queue=%d nb_tx_queue=%u...\n",
+ nb_rx_queue, nb_tx_queue);
+
+ ret = rte_eth_dev_configure(portid, nb_rx_queue, nb_tx_queue,
+ &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: "
+ "err=%d, port=%d\n", ret, portid);
+
+ /* init one TX queue per lcore */
+ tx_queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socket_id = (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socket_id = 0;
+
+ /* init TX queue */
+ printf("Setup txq=%u,%d,%d\n", lcore_id, tx_queueid, socket_id);
+
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+
+ ret = rte_eth_tx_queue_setup(portid, tx_queueid, nb_txd,
+ socket_id, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
+ "err=%d, port=%d\n", ret, portid);
+
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_queue_id[portid] = tx_queueid;
+ tx_queueid++;
+
+ /* init RX queues */
+ for (queue = 0; queue < qconf->nb_rx_queue; ++queue) {
+ if (portid != qconf->rx_queue_list[queue].port_id)
+ continue;
+
+ rx_queueid = qconf->rx_queue_list[queue].queue_id;
+
+ printf("Setup rxq=%d,%d,%d\n", portid, rx_queueid,
+ socket_id);
+
+ ret = rte_eth_rx_queue_setup(portid, rx_queueid,
+ nb_rxd, socket_id, NULL,
+ socket_ctx[socket_id].mbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+ }
+ }
+ printf("\n");
+}
+
+static void
+pool_init(struct socket_ctx *ctx, int32_t socket_id, uint32_t nb_mbuf)
+{
+ char s[64];
+
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socket_id);
+ ctx->mbuf_pool = rte_pktmbuf_pool_create(s, nb_mbuf,
+ MEMPOOL_CACHE_SIZE, ipsec_metadata_size(),
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ socket_id);
+ if (ctx->mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n",
+ socket_id);
+ else
+ printf("Allocated mbuf pool on socket %d\n", socket_id);
+}
+
+int32_t
+main(int32_t argc, char **argv)
+{
+ int32_t ret;
+ uint32_t lcore_id, nb_ports;
+ uint8_t portid, socket_id;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid parameters\n");
+
+ if (ep < 0)
+ rte_exit(EXIT_FAILURE, "need to choose either EP0 or EP1\n");
+
+ if ((unprotected_port_mask & enabled_port_mask) !=
+ unprotected_port_mask)
+ rte_exit(EXIT_FAILURE, "Invalid unprotected portmask 0x%x\n",
+ unprotected_port_mask);
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* Replicate each contex per socket */
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socket_id = (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socket_id = 0;
+
+ if (socket_ctx[socket_id].mbuf_pool)
+ continue;
+
+ sa_init(&socket_ctx[socket_id], socket_id, ep);
+
+ sp_init(&socket_ctx[socket_id], socket_id, ep);
+
+ rt_init(&socket_ctx[socket_id], socket_id, ep);
+
+ pool_init(&socket_ctx[socket_id], socket_id, NB_MBUF);
+ }
+
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ port_init(portid);
+ }
+
+ cryptodevs_init();
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: "
+ "err=%d, port=%d\n", ret, portid);
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/ipsec-secgw/ipsec.c b/examples/ipsec-secgw/ipsec.c
new file mode 100644
index 00000000..baf30d4b
--- /dev/null
+++ b/examples/ipsec-secgw/ipsec.c
@@ -0,0 +1,203 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_mbuf.h>
+#include <rte_hash.h>
+
+#include "ipsec.h"
+
+static inline int
+create_session(struct ipsec_ctx *ipsec_ctx __rte_unused, struct ipsec_sa *sa)
+{
+ uint32_t cdev_id_qp = 0;
+ int32_t ret;
+ struct cdev_key key = { 0 };
+
+ key.lcore_id = (uint8_t)rte_lcore_id();
+
+ key.cipher_algo = (uint8_t)sa->cipher_algo;
+ key.auth_algo = (uint8_t)sa->auth_algo;
+
+ ret = rte_hash_lookup_data(ipsec_ctx->cdev_map, &key,
+ (void **)&cdev_id_qp);
+ if (ret < 0) {
+ IPSEC_LOG(ERR, IPSEC, "No cryptodev: core %u, cipher_algo %u, "
+ "auth_algo %u\n", key.lcore_id, key.cipher_algo,
+ key.auth_algo);
+ return -1;
+ }
+
+ IPSEC_LOG(DEBUG, IPSEC, "Create session for SA spi %u on cryptodev "
+ "%u qp %u\n", sa->spi, ipsec_ctx->tbl[cdev_id_qp].id,
+ ipsec_ctx->tbl[cdev_id_qp].qp);
+
+ sa->crypto_session = rte_cryptodev_sym_session_create(
+ ipsec_ctx->tbl[cdev_id_qp].id, sa->xforms);
+
+ sa->cdev_id_qp = cdev_id_qp;
+
+ return 0;
+}
+
+static inline void
+enqueue_cop(struct cdev_qp *cqp, struct rte_crypto_op *cop)
+{
+ int ret, i;
+
+ cqp->buf[cqp->len++] = cop;
+
+ if (cqp->len == MAX_PKT_BURST) {
+ ret = rte_cryptodev_enqueue_burst(cqp->id, cqp->qp,
+ cqp->buf, cqp->len);
+ if (ret < cqp->len) {
+ IPSEC_LOG(DEBUG, IPSEC, "Cryptodev %u queue %u:"
+ " enqueued %u crypto ops out of %u\n",
+ cqp->id, cqp->qp,
+ ret, cqp->len);
+ for (i = ret; i < cqp->len; i++)
+ rte_pktmbuf_free(cqp->buf[i]->sym->m_src);
+ }
+ cqp->in_flight += ret;
+ cqp->len = 0;
+ }
+}
+
+static inline uint16_t
+ipsec_processing(struct ipsec_ctx *ipsec_ctx, struct rte_mbuf *pkts[],
+ struct ipsec_sa *sas[], uint16_t nb_pkts, uint16_t max_pkts)
+{
+ int ret = 0, i, j, nb_cops;
+ struct ipsec_mbuf_metadata *priv;
+ struct rte_crypto_op *cops[max_pkts];
+ struct ipsec_sa *sa;
+ struct rte_mbuf *pkt;
+
+ for (i = 0; i < nb_pkts; i++) {
+ rte_prefetch0(sas[i]);
+ rte_prefetch0(pkts[i]);
+
+ priv = get_priv(pkts[i]);
+ sa = sas[i];
+ priv->sa = sa;
+
+ IPSEC_ASSERT(sa != NULL);
+
+ priv->cop.type = RTE_CRYPTO_OP_TYPE_SYMMETRIC;
+
+ rte_prefetch0(&priv->sym_cop);
+ priv->cop.sym = &priv->sym_cop;
+
+ if ((unlikely(sa->crypto_session == NULL)) &&
+ create_session(ipsec_ctx, sa)) {
+ rte_pktmbuf_free(pkts[i]);
+ continue;
+ }
+
+ rte_crypto_op_attach_sym_session(&priv->cop,
+ sa->crypto_session);
+
+ ret = sa->pre_crypto(pkts[i], sa, &priv->cop);
+ if (unlikely(ret)) {
+ rte_pktmbuf_free(pkts[i]);
+ continue;
+ }
+
+ IPSEC_ASSERT(sa->cdev_id_qp < ipsec_ctx->nb_qps);
+ enqueue_cop(&ipsec_ctx->tbl[sa->cdev_id_qp], &priv->cop);
+ }
+
+ nb_pkts = 0;
+ for (i = 0; i < ipsec_ctx->nb_qps && nb_pkts < max_pkts; i++) {
+ struct cdev_qp *cqp;
+
+ cqp = &ipsec_ctx->tbl[ipsec_ctx->last_qp++];
+ if (ipsec_ctx->last_qp == ipsec_ctx->nb_qps)
+ ipsec_ctx->last_qp %= ipsec_ctx->nb_qps;
+
+ if (cqp->in_flight == 0)
+ continue;
+
+ nb_cops = rte_cryptodev_dequeue_burst(cqp->id, cqp->qp,
+ cops, max_pkts - nb_pkts);
+
+ cqp->in_flight -= nb_cops;
+
+ for (j = 0; j < nb_cops; j++) {
+ pkt = cops[j]->sym->m_src;
+ rte_prefetch0(pkt);
+
+ priv = get_priv(pkt);
+ sa = priv->sa;
+
+ IPSEC_ASSERT(sa != NULL);
+
+ ret = sa->post_crypto(pkt, sa, cops[j]);
+ if (unlikely(ret))
+ rte_pktmbuf_free(pkt);
+ else
+ pkts[nb_pkts++] = pkt;
+ }
+ }
+
+ /* return packets */
+ return nb_pkts;
+}
+
+uint16_t
+ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t len)
+{
+ struct ipsec_sa *sas[nb_pkts];
+
+ inbound_sa_lookup(ctx->sa_ctx, pkts, sas, nb_pkts);
+
+ return ipsec_processing(ctx, pkts, sas, nb_pkts, len);
+}
+
+uint16_t
+ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+ uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len)
+{
+ struct ipsec_sa *sas[nb_pkts];
+
+ outbound_sa_lookup(ctx->sa_ctx, sa_idx, sas, nb_pkts);
+
+ return ipsec_processing(ctx, pkts, sas, nb_pkts, len);
+}
diff --git a/examples/ipsec-secgw/ipsec.h b/examples/ipsec-secgw/ipsec.h
new file mode 100644
index 00000000..a13fdef9
--- /dev/null
+++ b/examples/ipsec-secgw/ipsec.h
@@ -0,0 +1,190 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __IPSEC_H__
+#define __IPSEC_H__
+
+#include <stdint.h>
+
+#include <rte_byteorder.h>
+#include <rte_ip.h>
+#include <rte_crypto.h>
+
+#define RTE_LOGTYPE_IPSEC RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_IPSEC_ESP RTE_LOGTYPE_USER2
+#define RTE_LOGTYPE_IPSEC_IPIP RTE_LOGTYPE_USER3
+
+#define MAX_PKT_BURST 32
+#define MAX_QP_PER_LCORE 256
+
+#ifdef IPSEC_DEBUG
+#define IPSEC_ASSERT(exp) \
+if (!(exp)) { \
+ rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \
+}
+
+#define IPSEC_LOG RTE_LOG
+#else
+#define IPSEC_ASSERT(exp) do {} while (0)
+#define IPSEC_LOG(...) do {} while (0)
+#endif /* IPSEC_DEBUG */
+
+#define MAX_DIGEST_SIZE 32 /* Bytes -- 256 bits */
+
+#define uint32_t_to_char(ip, a, b, c, d) do {\
+ *a = (unsigned char)(ip >> 24 & 0xff);\
+ *b = (unsigned char)(ip >> 16 & 0xff);\
+ *c = (unsigned char)(ip >> 8 & 0xff);\
+ *d = (unsigned char)(ip & 0xff);\
+ } while (0)
+
+#define DEFAULT_MAX_CATEGORIES 1
+
+#define IPSEC_SA_MAX_ENTRIES (64) /* must be power of 2, max 2 power 30 */
+#define SPI2IDX(spi) (spi & (IPSEC_SA_MAX_ENTRIES - 1))
+#define INVALID_SPI (0)
+
+#define DISCARD (0x80000000)
+#define BYPASS (0x40000000)
+#define PROTECT_MASK (0x3fffffff)
+#define PROTECT(sa_idx) (SPI2IDX(sa_idx) & PROTECT_MASK) /* SA idx 30 bits */
+
+#define IPSEC_XFORM_MAX 2
+
+struct rte_crypto_xform;
+struct ipsec_xform;
+struct rte_cryptodev_session;
+struct rte_mbuf;
+
+struct ipsec_sa;
+
+typedef int (*ipsec_xform_fn)(struct rte_mbuf *m, struct ipsec_sa *sa,
+ struct rte_crypto_op *cop);
+
+struct ipsec_sa {
+ uint32_t spi;
+ uint32_t cdev_id_qp;
+ uint32_t src;
+ uint32_t dst;
+ struct rte_cryptodev_sym_session *crypto_session;
+ struct rte_crypto_sym_xform *xforms;
+ ipsec_xform_fn pre_crypto;
+ ipsec_xform_fn post_crypto;
+ enum rte_crypto_cipher_algorithm cipher_algo;
+ enum rte_crypto_auth_algorithm auth_algo;
+ uint16_t digest_len;
+ uint16_t iv_len;
+ uint16_t block_size;
+ uint16_t flags;
+ uint32_t seq;
+} __rte_cache_aligned;
+
+struct ipsec_mbuf_metadata {
+ struct ipsec_sa *sa;
+ struct rte_crypto_op cop;
+ struct rte_crypto_sym_op sym_cop;
+};
+
+struct cdev_qp {
+ uint16_t id;
+ uint16_t qp;
+ uint16_t in_flight;
+ uint16_t len;
+ struct rte_crypto_op *buf[MAX_PKT_BURST] __rte_aligned(sizeof(void *));
+};
+
+struct ipsec_ctx {
+ struct rte_hash *cdev_map;
+ struct sp_ctx *sp_ctx;
+ struct sa_ctx *sa_ctx;
+ uint16_t nb_qps;
+ uint16_t last_qp;
+ struct cdev_qp tbl[MAX_QP_PER_LCORE];
+};
+
+struct cdev_key {
+ uint16_t lcore_id;
+ uint8_t cipher_algo;
+ uint8_t auth_algo;
+};
+
+struct socket_ctx {
+ struct sa_ctx *sa_ipv4_in;
+ struct sa_ctx *sa_ipv4_out;
+ struct sp_ctx *sp_ipv4_in;
+ struct sp_ctx *sp_ipv4_out;
+ struct rt_ctx *rt_ipv4;
+ struct rte_mempool *mbuf_pool;
+};
+
+uint16_t
+ipsec_inbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t len);
+
+uint16_t
+ipsec_outbound(struct ipsec_ctx *ctx, struct rte_mbuf *pkts[],
+ uint32_t sa_idx[], uint16_t nb_pkts, uint16_t len);
+
+static inline uint16_t
+ipsec_metadata_size(void)
+{
+ return sizeof(struct ipsec_mbuf_metadata);
+}
+
+static inline struct ipsec_mbuf_metadata *
+get_priv(struct rte_mbuf *m)
+{
+ return RTE_PTR_ADD(m, sizeof(struct rte_mbuf));
+}
+
+int
+inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx);
+
+void
+inbound_sa_lookup(struct sa_ctx *sa_ctx, struct rte_mbuf *pkts[],
+ struct ipsec_sa *sa[], uint16_t nb_pkts);
+
+void
+outbound_sa_lookup(struct sa_ctx *sa_ctx, uint32_t sa_idx[],
+ struct ipsec_sa *sa[], uint16_t nb_pkts);
+
+void
+sp_init(struct socket_ctx *ctx, int socket_id, unsigned ep);
+
+void
+sa_init(struct socket_ctx *ctx, int socket_id, unsigned ep);
+
+void
+rt_init(struct socket_ctx *ctx, int socket_id, unsigned ep);
+
+#endif /* __IPSEC_H__ */
diff --git a/examples/ipsec-secgw/rt.c b/examples/ipsec-secgw/rt.c
new file mode 100644
index 00000000..a6d0866a
--- /dev/null
+++ b/examples/ipsec-secgw/rt.c
@@ -0,0 +1,145 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Routing Table (RT)
+ */
+#include <sys/types.h>
+#include <rte_lpm.h>
+#include <rte_errno.h>
+
+#include "ipsec.h"
+
+#define RT_IPV4_MAX_RULES 64
+
+struct ipv4_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+/* In the default routing table we have:
+ * ep0 protected ports 0 and 1, and unprotected ports 2 and 3.
+ */
+static struct ipv4_route rt_ipv4_ep0[] = {
+ { IPv4(172, 16, 2, 5), 32, 0 },
+ { IPv4(172, 16, 2, 6), 32, 0 },
+ { IPv4(172, 16, 2, 7), 32, 1 },
+ { IPv4(172, 16, 2, 8), 32, 1 },
+
+ { IPv4(192, 168, 115, 0), 24, 2 },
+ { IPv4(192, 168, 116, 0), 24, 2 },
+ { IPv4(192, 168, 117, 0), 24, 3 },
+ { IPv4(192, 168, 118, 0), 24, 3 },
+
+ { IPv4(192, 168, 210, 0), 24, 2 },
+
+ { IPv4(192, 168, 240, 0), 24, 2 },
+ { IPv4(192, 168, 250, 0), 24, 0 }
+};
+
+/* In the default routing table we have:
+ * ep1 protected ports 0 and 1, and unprotected ports 2 and 3.
+ */
+static struct ipv4_route rt_ipv4_ep1[] = {
+ { IPv4(172, 16, 1, 5), 32, 2 },
+ { IPv4(172, 16, 1, 6), 32, 2 },
+ { IPv4(172, 16, 1, 7), 32, 3 },
+ { IPv4(172, 16, 1, 8), 32, 3 },
+
+ { IPv4(192, 168, 105, 0), 24, 0 },
+ { IPv4(192, 168, 106, 0), 24, 0 },
+ { IPv4(192, 168, 107, 0), 24, 1 },
+ { IPv4(192, 168, 108, 0), 24, 1 },
+
+ { IPv4(192, 168, 200, 0), 24, 0 },
+
+ { IPv4(192, 168, 240, 0), 24, 2 },
+ { IPv4(192, 168, 250, 0), 24, 0 }
+};
+
+void
+rt_init(struct socket_ctx *ctx, int socket_id, unsigned ep)
+{
+ char name[PATH_MAX];
+ unsigned i;
+ int ret;
+ struct rte_lpm *lpm;
+ struct ipv4_route *rt;
+ char a, b, c, d;
+ unsigned nb_routes;
+ struct rte_lpm_config conf = { 0 };
+
+ if (ctx == NULL)
+ rte_exit(EXIT_FAILURE, "NULL context.\n");
+
+ if (ctx->rt_ipv4 != NULL)
+ rte_exit(EXIT_FAILURE, "Routing Table for socket %u already "
+ "initialized\n", socket_id);
+
+ printf("Creating Routing Table (RT) context with %u max routes\n",
+ RT_IPV4_MAX_RULES);
+
+ if (ep == 0) {
+ rt = rt_ipv4_ep0;
+ nb_routes = RTE_DIM(rt_ipv4_ep0);
+ } else if (ep == 1) {
+ rt = rt_ipv4_ep1;
+ nb_routes = RTE_DIM(rt_ipv4_ep1);
+ } else
+ rte_exit(EXIT_FAILURE, "Invalid EP value %u. Only 0 or 1 "
+ "supported.\n", ep);
+
+ /* create the LPM table */
+ snprintf(name, sizeof(name), "%s_%u", "rt_ipv4", socket_id);
+ conf.max_rules = RT_IPV4_MAX_RULES;
+ conf.number_tbl8s = RTE_LPM_TBL8_NUM_ENTRIES;
+ lpm = rte_lpm_create(name, socket_id, &conf);
+ if (lpm == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create LPM table "
+ "on socket %d\n", socket_id);
+
+ /* populate the LPM table */
+ for (i = 0; i < nb_routes; i++) {
+ ret = rte_lpm_add(lpm, rt[i].ip, rt[i].depth, rt[i].if_out);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Unable to add entry num %u to "
+ "LPM table on socket %d\n", i, socket_id);
+
+ uint32_t_to_char(rt[i].ip, &a, &b, &c, &d);
+ printf("LPM: Adding route %hhu.%hhu.%hhu.%hhu/%hhu (%hhu)\n",
+ a, b, c, d, rt[i].depth, rt[i].if_out);
+ }
+
+ ctx->rt_ipv4 = (struct rt_ctx *)lpm;
+}
diff --git a/examples/ipsec-secgw/sa.c b/examples/ipsec-secgw/sa.c
new file mode 100644
index 00000000..b6260ede
--- /dev/null
+++ b/examples/ipsec-secgw/sa.c
@@ -0,0 +1,446 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Security Associations
+ */
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <rte_memzone.h>
+#include <rte_crypto.h>
+#include <rte_cryptodev.h>
+#include <rte_byteorder.h>
+#include <rte_errno.h>
+
+#include "ipsec.h"
+#include "esp.h"
+
+/* SAs EP0 Outbound */
+const struct ipsec_sa sa_ep0_out[] = {
+ { 5, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 6, 0, IPv4(172, 16, 1, 6), IPv4(172, 16, 2, 6),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 7, 0, IPv4(172, 16, 1, 7), IPv4(172, 16, 2, 7),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 8, 0, IPv4(172, 16, 1, 8), IPv4(172, 16, 2, 8),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 9, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL,
+ 0, 0, 4,
+ 0, 0 },
+};
+
+/* SAs EP0 Inbound */
+const struct ipsec_sa sa_ep0_in[] = {
+ { 5, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 6, 0, IPv4(172, 16, 2, 6), IPv4(172, 16, 1, 6),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 7, 0, IPv4(172, 16, 2, 7), IPv4(172, 16, 1, 7),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 8, 0, IPv4(172, 16, 2, 8), IPv4(172, 16, 1, 8),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 9, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL,
+ 0, 0, 4,
+ 0, 0 },
+};
+
+/* SAs EP1 Outbound */
+const struct ipsec_sa sa_ep1_out[] = {
+ { 5, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 6, 0, IPv4(172, 16, 2, 6), IPv4(172, 16, 1, 6),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 7, 0, IPv4(172, 16, 2, 7), IPv4(172, 16, 1, 7),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 8, 0, IPv4(172, 16, 2, 8), IPv4(172, 16, 1, 8),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 9, 0, IPv4(172, 16, 2, 5), IPv4(172, 16, 1, 5),
+ NULL, NULL,
+ esp4_tunnel_outbound_pre_crypto,
+ esp4_tunnel_outbound_post_crypto,
+ RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL,
+ 0, 0, 4,
+ 0, 0 },
+};
+
+/* SAs EP1 Inbound */
+const struct ipsec_sa sa_ep1_in[] = {
+ { 5, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 6, 0, IPv4(172, 16, 1, 6), IPv4(172, 16, 2, 6),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 7, 0, IPv4(172, 16, 1, 7), IPv4(172, 16, 2, 7),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 8, 0, IPv4(172, 16, 1, 8), IPv4(172, 16, 2, 8),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_AES_CBC, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ 12, 16, 16,
+ 0, 0 },
+ { 9, 0, IPv4(172, 16, 1, 5), IPv4(172, 16, 2, 5),
+ NULL, NULL,
+ esp4_tunnel_inbound_pre_crypto,
+ esp4_tunnel_inbound_post_crypto,
+ RTE_CRYPTO_CIPHER_NULL, RTE_CRYPTO_AUTH_NULL,
+ 0, 0, 4,
+ 0, 0 },
+};
+
+static uint8_t cipher_key[256] = "sixteenbytes key";
+
+/* AES CBC xform */
+const struct rte_crypto_sym_xform aescbc_enc_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_CIPHER,
+ {.cipher = { RTE_CRYPTO_CIPHER_OP_ENCRYPT, RTE_CRYPTO_CIPHER_AES_CBC,
+ .key = { cipher_key, 16 } }
+ }
+};
+
+const struct rte_crypto_sym_xform aescbc_dec_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_CIPHER,
+ {.cipher = { RTE_CRYPTO_CIPHER_OP_DECRYPT, RTE_CRYPTO_CIPHER_AES_CBC,
+ .key = { cipher_key, 16 } }
+ }
+};
+
+static uint8_t auth_key[256] = "twentybytes hash key";
+
+/* SHA1 HMAC xform */
+const struct rte_crypto_sym_xform sha1hmac_gen_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_AUTH,
+ {.auth = { RTE_CRYPTO_AUTH_OP_GENERATE, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ .key = { auth_key, 20 }, 12, 0 }
+ }
+};
+
+const struct rte_crypto_sym_xform sha1hmac_verify_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_AUTH,
+ {.auth = { RTE_CRYPTO_AUTH_OP_VERIFY, RTE_CRYPTO_AUTH_SHA1_HMAC,
+ .key = { auth_key, 20 }, 12, 0 }
+ }
+};
+
+/* AES CBC xform */
+const struct rte_crypto_sym_xform null_cipher_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_CIPHER,
+ {.cipher = { .algo = RTE_CRYPTO_CIPHER_NULL }
+ }
+};
+
+const struct rte_crypto_sym_xform null_auth_xf = {
+ NULL,
+ RTE_CRYPTO_SYM_XFORM_AUTH,
+ {.auth = { .algo = RTE_CRYPTO_AUTH_NULL }
+ }
+};
+
+struct sa_ctx {
+ struct ipsec_sa sa[IPSEC_SA_MAX_ENTRIES];
+ struct {
+ struct rte_crypto_sym_xform a;
+ struct rte_crypto_sym_xform b;
+ } xf[IPSEC_SA_MAX_ENTRIES];
+};
+
+static struct sa_ctx *
+sa_ipv4_create(const char *name, int socket_id)
+{
+ char s[PATH_MAX];
+ struct sa_ctx *sa_ctx;
+ unsigned mz_size;
+ const struct rte_memzone *mz;
+
+ snprintf(s, sizeof(s), "%s_%u", name, socket_id);
+
+ /* Create SA array table */
+ printf("Creating SA context with %u maximum entries\n",
+ IPSEC_SA_MAX_ENTRIES);
+
+ mz_size = sizeof(struct sa_ctx);
+ mz = rte_memzone_reserve(s, mz_size, socket_id,
+ RTE_MEMZONE_1GB | RTE_MEMZONE_SIZE_HINT_ONLY);
+ if (mz == NULL) {
+ printf("Failed to allocate SA DB memory\n");
+ rte_errno = -ENOMEM;
+ return NULL;
+ }
+
+ sa_ctx = (struct sa_ctx *)mz->addr;
+
+ return sa_ctx;
+}
+
+static int
+sa_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[],
+ unsigned nb_entries, unsigned inbound)
+{
+ struct ipsec_sa *sa;
+ unsigned i, idx;
+
+ for (i = 0; i < nb_entries; i++) {
+ idx = SPI2IDX(entries[i].spi);
+ sa = &sa_ctx->sa[idx];
+ if (sa->spi != 0) {
+ printf("Index %u already in use by SPI %u\n",
+ idx, sa->spi);
+ return -EINVAL;
+ }
+ *sa = entries[i];
+ sa->src = rte_cpu_to_be_32(sa->src);
+ sa->dst = rte_cpu_to_be_32(sa->dst);
+ if (inbound) {
+ if (sa->cipher_algo == RTE_CRYPTO_CIPHER_NULL) {
+ sa_ctx->xf[idx].a = null_auth_xf;
+ sa_ctx->xf[idx].b = null_cipher_xf;
+ } else {
+ sa_ctx->xf[idx].a = sha1hmac_verify_xf;
+ sa_ctx->xf[idx].b = aescbc_dec_xf;
+ }
+ } else { /* outbound */
+ if (sa->cipher_algo == RTE_CRYPTO_CIPHER_NULL) {
+ sa_ctx->xf[idx].a = null_cipher_xf;
+ sa_ctx->xf[idx].b = null_auth_xf;
+ } else {
+ sa_ctx->xf[idx].a = aescbc_enc_xf;
+ sa_ctx->xf[idx].b = sha1hmac_gen_xf;
+ }
+ }
+ sa_ctx->xf[idx].a.next = &sa_ctx->xf[idx].b;
+ sa_ctx->xf[idx].b.next = NULL;
+ sa->xforms = &sa_ctx->xf[idx].a;
+ }
+
+ return 0;
+}
+
+static inline int
+sa_out_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[],
+ unsigned nb_entries)
+{
+ return sa_add_rules(sa_ctx, entries, nb_entries, 0);
+}
+
+static inline int
+sa_in_add_rules(struct sa_ctx *sa_ctx, const struct ipsec_sa entries[],
+ unsigned nb_entries)
+{
+ return sa_add_rules(sa_ctx, entries, nb_entries, 1);
+}
+
+void
+sa_init(struct socket_ctx *ctx, int socket_id, unsigned ep)
+{
+ const struct ipsec_sa *sa_out_entries, *sa_in_entries;
+ unsigned nb_out_entries, nb_in_entries;
+ const char *name;
+
+ if (ctx == NULL)
+ rte_exit(EXIT_FAILURE, "NULL context.\n");
+
+ if (ctx->sa_ipv4_in != NULL)
+ rte_exit(EXIT_FAILURE, "Inbound SA DB for socket %u already "
+ "initialized\n", socket_id);
+
+ if (ctx->sa_ipv4_out != NULL)
+ rte_exit(EXIT_FAILURE, "Outbound SA DB for socket %u already "
+ "initialized\n", socket_id);
+
+ if (ep == 0) {
+ sa_out_entries = sa_ep0_out;
+ nb_out_entries = RTE_DIM(sa_ep0_out);
+ sa_in_entries = sa_ep0_in;
+ nb_in_entries = RTE_DIM(sa_ep0_in);
+ } else if (ep == 1) {
+ sa_out_entries = sa_ep1_out;
+ nb_out_entries = RTE_DIM(sa_ep1_out);
+ sa_in_entries = sa_ep1_in;
+ nb_in_entries = RTE_DIM(sa_ep1_in);
+ } else
+ rte_exit(EXIT_FAILURE, "Invalid EP value %u. "
+ "Only 0 or 1 supported.\n", ep);
+
+ name = "sa_ipv4_in";
+ ctx->sa_ipv4_in = sa_ipv4_create(name, socket_id);
+ if (ctx->sa_ipv4_in == NULL)
+ rte_exit(EXIT_FAILURE, "Error [%d] creating SA context %s "
+ "in socket %d\n", rte_errno, name, socket_id);
+
+ name = "sa_ipv4_out";
+ ctx->sa_ipv4_out = sa_ipv4_create(name, socket_id);
+ if (ctx->sa_ipv4_out == NULL)
+ rte_exit(EXIT_FAILURE, "Error [%d] creating SA context %s "
+ "in socket %d\n", rte_errno, name, socket_id);
+
+ sa_in_add_rules(ctx->sa_ipv4_in, sa_in_entries, nb_in_entries);
+
+ sa_out_add_rules(ctx->sa_ipv4_out, sa_out_entries, nb_out_entries);
+}
+
+int
+inbound_sa_check(struct sa_ctx *sa_ctx, struct rte_mbuf *m, uint32_t sa_idx)
+{
+ struct ipsec_mbuf_metadata *priv;
+
+ priv = RTE_PTR_ADD(m, sizeof(struct rte_mbuf));
+
+ return (sa_ctx->sa[sa_idx].spi == priv->sa->spi);
+}
+
+void
+inbound_sa_lookup(struct sa_ctx *sa_ctx, struct rte_mbuf *pkts[],
+ struct ipsec_sa *sa[], uint16_t nb_pkts)
+{
+ unsigned i;
+ uint32_t *src, spi;
+
+ for (i = 0; i < nb_pkts; i++) {
+ spi = rte_pktmbuf_mtod_offset(pkts[i], struct esp_hdr *,
+ sizeof(struct ip))->spi;
+
+ if (spi == INVALID_SPI)
+ continue;
+
+ sa[i] = &sa_ctx->sa[SPI2IDX(spi)];
+ if (spi != sa[i]->spi) {
+ sa[i] = NULL;
+ continue;
+ }
+
+ src = rte_pktmbuf_mtod_offset(pkts[i], uint32_t *,
+ offsetof(struct ip, ip_src));
+ if ((sa[i]->src != *src) || (sa[i]->dst != *(src + 1)))
+ sa[i] = NULL;
+ }
+}
+
+void
+outbound_sa_lookup(struct sa_ctx *sa_ctx, uint32_t sa_idx[],
+ struct ipsec_sa *sa[], uint16_t nb_pkts)
+{
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ sa[i] = &sa_ctx->sa[sa_idx[i]];
+}
diff --git a/examples/ipsec-secgw/sp.c b/examples/ipsec-secgw/sp.c
new file mode 100644
index 00000000..4f167301
--- /dev/null
+++ b/examples/ipsec-secgw/sp.c
@@ -0,0 +1,366 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Security Policies
+ */
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+
+#include <rte_acl.h>
+
+#include "ipsec.h"
+
+#define MAX_ACL_RULE_NUM 1000
+
+/*
+ * Rule and trace formats definitions.
+ */
+enum {
+ PROTO_FIELD_IPV4,
+ SRC_FIELD_IPV4,
+ DST_FIELD_IPV4,
+ SRCP_FIELD_IPV4,
+ DSTP_FIELD_IPV4,
+ NUM_FIELDS_IPV4
+};
+
+/*
+ * That effectively defines order of IPV4 classifications:
+ * - PROTO
+ * - SRC IP ADDRESS
+ * - DST IP ADDRESS
+ * - PORTS (SRC and DST)
+ */
+enum {
+ RTE_ACL_IPV4_PROTO,
+ RTE_ACL_IPV4_SRC,
+ RTE_ACL_IPV4_DST,
+ RTE_ACL_IPV4_PORTS,
+ RTE_ACL_IPV4_NUM
+};
+
+struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {
+ {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = PROTO_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4_PROTO,
+ .offset = 0,
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4_SRC,
+ .offset = offsetof(struct ip, ip_src) - offsetof(struct ip, ip_p)
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4_DST,
+ .offset = offsetof(struct ip, ip_dst) - offsetof(struct ip, ip_p)
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = SRCP_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4_PORTS,
+ .offset = sizeof(struct ip) - offsetof(struct ip, ip_p)
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = DSTP_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4_PORTS,
+ .offset = sizeof(struct ip) - offsetof(struct ip, ip_p) +
+ sizeof(uint16_t)
+ },
+};
+
+RTE_ACL_RULE_DEF(acl4_rules, RTE_DIM(ipv4_defs));
+
+const struct acl4_rules acl4_rules_in[] = {
+ {
+ .data = {.userdata = PROTECT(5), .category_mask = 1, .priority = 1},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 105, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(6), .category_mask = 1, .priority = 2},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 106, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(7), .category_mask = 1, .priority = 3},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 107, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(8), .category_mask = 1, .priority = 4},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 108, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(9), .category_mask = 1, .priority = 5},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 200, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = BYPASS, .category_mask = 1, .priority = 6},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 250, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ }
+};
+
+const struct acl4_rules acl4_rules_out[] = {
+ {
+ .data = {.userdata = PROTECT(5), .category_mask = 1, .priority = 1},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 115, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(6), .category_mask = 1, .priority = 2},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 116, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(7), .category_mask = 1, .priority = 3},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 117, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(8), .category_mask = 1, .priority = 4},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 118, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = PROTECT(9), .category_mask = 1, .priority = 5},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 210, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ },
+ {
+ .data = {.userdata = BYPASS, .category_mask = 1, .priority = 6},
+ /* destination IPv4 */
+ .field[2] = {.value.u32 = IPv4(192, 168, 240, 0),
+ .mask_range.u32 = 24,},
+ /* source port */
+ .field[3] = {.value.u16 = 0, .mask_range.u16 = 0xffff,},
+ /* destination port */
+ .field[4] = {.value.u16 = 0, .mask_range.u16 = 0xffff,}
+ }
+};
+
+static void
+print_one_ipv4_rule(const struct acl4_rules *rule, int extra)
+{
+ unsigned char a, b, c, d;
+
+ uint32_t_to_char(rule->field[SRC_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d,
+ rule->field[SRC_FIELD_IPV4].mask_range.u32);
+ uint32_t_to_char(rule->field[DST_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d,
+ rule->field[DST_FIELD_IPV4].mask_range.u32);
+ printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ",
+ rule->field[SRCP_FIELD_IPV4].value.u16,
+ rule->field[SRCP_FIELD_IPV4].mask_range.u16,
+ rule->field[DSTP_FIELD_IPV4].value.u16,
+ rule->field[DSTP_FIELD_IPV4].mask_range.u16,
+ rule->field[PROTO_FIELD_IPV4].value.u8,
+ rule->field[PROTO_FIELD_IPV4].mask_range.u8);
+ if (extra)
+ printf("0x%x-0x%x-0x%x ",
+ rule->data.category_mask,
+ rule->data.priority,
+ rule->data.userdata);
+}
+
+static inline void
+dump_ipv4_rules(const struct acl4_rules *rule, int num, int extra)
+{
+ int i;
+
+ for (i = 0; i < num; i++, rule++) {
+ printf("\t%d:", i + 1);
+ print_one_ipv4_rule(rule, extra);
+ printf("\n");
+ }
+}
+
+static struct rte_acl_ctx *
+acl4_init(const char *name, int socketid, const struct acl4_rules *rules,
+ unsigned rules_nb)
+{
+ char s[PATH_MAX];
+ struct rte_acl_param acl_param;
+ struct rte_acl_config acl_build_param;
+ struct rte_acl_ctx *ctx;
+
+ printf("Creating SP context with %u max rules\n", MAX_ACL_RULE_NUM);
+
+ memset(&acl_param, 0, sizeof(acl_param));
+
+ /* Create ACL contexts */
+ snprintf(s, sizeof(s), "%s_%d", name, socketid);
+
+ printf("IPv4 %s entries [%u]:\n", s, rules_nb);
+ dump_ipv4_rules(rules, rules_nb, 1);
+
+ acl_param.name = s;
+ acl_param.socket_id = socketid;
+ acl_param.rule_size = RTE_ACL_RULE_SZ(RTE_DIM(ipv4_defs));
+ acl_param.max_rule_num = MAX_ACL_RULE_NUM;
+
+ ctx = rte_acl_create(&acl_param);
+ if (ctx == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to create ACL context\n");
+
+ if (rte_acl_add_rules(ctx, (const struct rte_acl_rule *)rules,
+ rules_nb) < 0)
+ rte_exit(EXIT_FAILURE, "add rules failed\n");
+
+ /* Perform builds */
+ memset(&acl_build_param, 0, sizeof(acl_build_param));
+
+ acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES;
+ acl_build_param.num_fields = RTE_DIM(ipv4_defs);
+ memcpy(&acl_build_param.defs, ipv4_defs, sizeof(ipv4_defs));
+
+ if (rte_acl_build(ctx, &acl_build_param) != 0)
+ rte_exit(EXIT_FAILURE, "Failed to build ACL trie\n");
+
+ rte_acl_dump(ctx);
+
+ return ctx;
+}
+
+void
+sp_init(struct socket_ctx *ctx, int socket_id, unsigned ep)
+{
+ const char *name;
+ const struct acl4_rules *rules_out, *rules_in;
+ unsigned nb_out_rules, nb_in_rules;
+
+ if (ctx == NULL)
+ rte_exit(EXIT_FAILURE, "NULL context.\n");
+
+ if (ctx->sp_ipv4_in != NULL)
+ rte_exit(EXIT_FAILURE, "Inbound SP DB for socket %u already "
+ "initialized\n", socket_id);
+
+ if (ctx->sp_ipv4_out != NULL)
+ rte_exit(EXIT_FAILURE, "Outbound SP DB for socket %u already "
+ "initialized\n", socket_id);
+
+ if (ep == 0) {
+ rules_out = acl4_rules_in;
+ nb_out_rules = RTE_DIM(acl4_rules_in);
+ rules_in = acl4_rules_out;
+ nb_in_rules = RTE_DIM(acl4_rules_out);
+ } else if (ep == 1) {
+ rules_out = acl4_rules_out;
+ nb_out_rules = RTE_DIM(acl4_rules_out);
+ rules_in = acl4_rules_in;
+ nb_in_rules = RTE_DIM(acl4_rules_in);
+ } else
+ rte_exit(EXIT_FAILURE, "Invalid EP value %u. "
+ "Only 0 or 1 supported.\n", ep);
+
+ name = "sp_ipv4_in";
+ ctx->sp_ipv4_in = (struct sp_ctx *)acl4_init(name, socket_id,
+ rules_in, nb_in_rules);
+
+ name = "sp_ipv4_out";
+ ctx->sp_ipv4_out = (struct sp_ctx *)acl4_init(name, socket_id,
+ rules_out, nb_out_rules);
+}
diff --git a/examples/ipv4_multicast/Makefile b/examples/ipv4_multicast/Makefile
new file mode 100644
index 00000000..44f0a3bb
--- /dev/null
+++ b/examples/ipv4_multicast/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ipv4_multicast
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ipv4_multicast/main.c b/examples/ipv4_multicast/main.c
new file mode 100644
index 00000000..96b41578
--- /dev/null
+++ b/examples/ipv4_multicast/main.c
@@ -0,0 +1,819 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_fbk_hash.h>
+#include <rte_ip.h>
+
+#define RTE_LOGTYPE_IPv4_MULTICAST RTE_LOGTYPE_USER1
+
+#define MAX_PORTS 16
+
+#define MCAST_CLONE_PORTS 2
+#define MCAST_CLONE_SEGS 2
+
+#define PKT_MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
+#define NB_PKT_MBUF 8192
+
+#define HDR_MBUF_DATA_SIZE (2 * RTE_PKTMBUF_HEADROOM)
+#define NB_HDR_MBUF (NB_PKT_MBUF * MAX_PORTS)
+
+#define NB_CLONE_MBUF (NB_PKT_MBUF * MCAST_CLONE_PORTS * MCAST_CLONE_SEGS * 2)
+
+/* allow max jumbo frame 9.5 KB */
+#define JUMBO_FRAME_MAX_SIZE 0x2600
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Construct Ethernet multicast address from IPv4 multicast address.
+ * Citing RFC 1112, section 6.4:
+ * "An IP host group address is mapped to an Ethernet multicast address
+ * by placing the low-order 23-bits of the IP address into the low-order
+ * 23 bits of the Ethernet multicast address 01-00-5E-00-00-00 (hex)."
+ */
+#define ETHER_ADDR_FOR_IPV4_MCAST(x) \
+ (rte_cpu_to_be_64(0x01005e000000ULL | ((x) & 0x7fffff)) >> 16)
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[MAX_PORTS];
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+
+static uint8_t nb_ports = 0;
+
+static int rx_queue_per_lcore = 1;
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ uint64_t tx_tsc;
+ uint16_t n_rx_queue;
+ uint8_t rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t tx_queue_id[MAX_PORTS];
+ struct mbuf_table tx_mbufs[MAX_PORTS];
+} __rte_cache_aligned;
+static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .max_rx_pkt_len = JUMBO_FRAME_MAX_SIZE,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 1, /**< Jumbo Frame Support enabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool *packet_pool, *header_pool, *clone_pool;
+
+
+/* Multicast */
+static struct rte_fbk_hash_params mcast_hash_params = {
+ .name = "MCAST_HASH",
+ .entries = 1024,
+ .entries_per_bucket = 4,
+ .socket_id = 0,
+ .hash_func = NULL,
+ .init_val = 0,
+};
+
+struct rte_fbk_hash_table *mcast_hash = NULL;
+
+struct mcast_group_params {
+ uint32_t ip;
+ uint16_t port_mask;
+};
+
+static struct mcast_group_params mcast_group_table[] = {
+ {IPv4(224,0,0,101), 0x1},
+ {IPv4(224,0,0,102), 0x2},
+ {IPv4(224,0,0,103), 0x3},
+ {IPv4(224,0,0,104), 0x4},
+ {IPv4(224,0,0,105), 0x5},
+ {IPv4(224,0,0,106), 0x6},
+ {IPv4(224,0,0,107), 0x7},
+ {IPv4(224,0,0,108), 0x8},
+ {IPv4(224,0,0,109), 0x9},
+ {IPv4(224,0,0,110), 0xA},
+ {IPv4(224,0,0,111), 0xB},
+ {IPv4(224,0,0,112), 0xC},
+ {IPv4(224,0,0,113), 0xD},
+ {IPv4(224,0,0,114), 0xE},
+ {IPv4(224,0,0,115), 0xF},
+};
+
+#define N_MCAST_GROUPS \
+ (sizeof (mcast_group_table) / sizeof (mcast_group_table[0]))
+
+
+/* Send burst of packets on an output interface */
+static void
+send_burst(struct lcore_queue_conf *qconf, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ uint16_t n, queueid;
+ int ret;
+
+ queueid = qconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+ n = qconf->tx_mbufs[port].len;
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ while (unlikely (ret < n)) {
+ rte_pktmbuf_free(m_table[ret]);
+ ret++;
+ }
+
+ qconf->tx_mbufs[port].len = 0;
+}
+
+/* Get number of bits set. */
+static inline uint32_t
+bitcnt(uint32_t v)
+{
+ uint32_t n;
+
+ for (n = 0; v != 0; v &= v - 1, n++)
+ ;
+
+ return n;
+}
+
+/**
+ * Create the output multicast packet based on the given input packet.
+ * There are two approaches for creating outgoing packet, though both
+ * are based on data zero-copy idea, they differ in few details:
+ * First one creates a clone of the input packet, e.g - walk though all
+ * segments of the input packet, and for each of them create a new packet
+ * mbuf and attach that new mbuf to the segment (refer to rte_pktmbuf_clone()
+ * for more details). Then new mbuf is allocated for the packet header
+ * and is prepended to the 'clone' mbuf.
+ * Second approach doesn't make a clone, it just increment refcnt for all
+ * input packet segments. Then it allocates new mbuf for the packet header
+ * and prepends it to the input packet.
+ * Basically first approach reuses only input packet's data, but creates
+ * it's own copy of packet's metadata. Second approach reuses both input's
+ * packet data and metadata.
+ * The advantage of first approach - is that each outgoing packet has it's
+ * own copy of metadata, so we can safely modify data pointer of the
+ * input packet. That allows us to skip creation if the output packet for
+ * the last destination port, but instead modify input packet's header inplace,
+ * e.g: for N destination ports we need to invoke mcast_out_pkt (N-1) times.
+ * The advantage of second approach - less work for each outgoing packet,
+ * e.g: we skip "clone" operation completely. Though it comes with a price -
+ * input packet's metadata has to be intact. So for N destination ports we
+ * need to invoke mcast_out_pkt N times.
+ * So for small number of outgoing ports (and segments in the input packet)
+ * first approach will be faster.
+ * As number of outgoing ports (and/or input segments) will grow,
+ * second way will become more preferable.
+ *
+ * @param pkt
+ * Input packet mbuf.
+ * @param use_clone
+ * Control which of the two approaches described above should be used:
+ * - 0 - use second approach:
+ * Don't "clone" input packet.
+ * Prepend new header directly to the input packet
+ * - 1 - use first approach:
+ * Make a "clone" of input packet first.
+ * Prepend new header to the clone of the input packet
+ * @return
+ * - The pointer to the new outgoing packet.
+ * - NULL if operation failed.
+ */
+static inline struct rte_mbuf *
+mcast_out_pkt(struct rte_mbuf *pkt, int use_clone)
+{
+ struct rte_mbuf *hdr;
+
+ /* Create new mbuf for the header. */
+ if (unlikely ((hdr = rte_pktmbuf_alloc(header_pool)) == NULL))
+ return NULL;
+
+ /* If requested, then make a new clone packet. */
+ if (use_clone != 0 &&
+ unlikely ((pkt = rte_pktmbuf_clone(pkt, clone_pool)) == NULL)) {
+ rte_pktmbuf_free(hdr);
+ return NULL;
+ }
+
+ /* prepend new header */
+ hdr->next = pkt;
+
+
+ /* update header's fields */
+ hdr->pkt_len = (uint16_t)(hdr->data_len + pkt->pkt_len);
+ hdr->nb_segs = (uint8_t)(pkt->nb_segs + 1);
+
+ /* copy metadata from source packet*/
+ hdr->port = pkt->port;
+ hdr->vlan_tci = pkt->vlan_tci;
+ hdr->vlan_tci_outer = pkt->vlan_tci_outer;
+ hdr->tx_offload = pkt->tx_offload;
+ hdr->hash = pkt->hash;
+
+ hdr->ol_flags = pkt->ol_flags;
+
+ __rte_mbuf_sanity_check(hdr, 1);
+ return hdr;
+}
+
+/*
+ * Write new Ethernet header to the outgoing packet,
+ * and put it into the outgoing queue for the given port.
+ */
+static inline void
+mcast_send_pkt(struct rte_mbuf *pkt, struct ether_addr *dest_addr,
+ struct lcore_queue_conf *qconf, uint8_t port)
+{
+ struct ether_hdr *ethdr;
+ uint16_t len;
+
+ /* Construct Ethernet header. */
+ ethdr = (struct ether_hdr *)rte_pktmbuf_prepend(pkt, (uint16_t)sizeof(*ethdr));
+ RTE_MBUF_ASSERT(ethdr != NULL);
+
+ ether_addr_copy(dest_addr, &ethdr->d_addr);
+ ether_addr_copy(&ports_eth_addr[port], &ethdr->s_addr);
+ ethdr->ether_type = rte_be_to_cpu_16(ETHER_TYPE_IPv4);
+
+ /* Put new packet into the output queue */
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = pkt;
+ qconf->tx_mbufs[port].len = ++len;
+
+ /* Transmit packets */
+ if (unlikely(MAX_PKT_BURST == len))
+ send_burst(qconf, port);
+}
+
+/* Multicast forward of the input packet */
+static inline void
+mcast_forward(struct rte_mbuf *m, struct lcore_queue_conf *qconf)
+{
+ struct rte_mbuf *mc;
+ struct ipv4_hdr *iphdr;
+ uint32_t dest_addr, port_mask, port_num, use_clone;
+ int32_t hash;
+ uint8_t port;
+ union {
+ uint64_t as_int;
+ struct ether_addr as_addr;
+ } dst_eth_addr;
+
+ /* Remove the Ethernet header from the input packet */
+ iphdr = (struct ipv4_hdr *)rte_pktmbuf_adj(m, (uint16_t)sizeof(struct ether_hdr));
+ RTE_MBUF_ASSERT(iphdr != NULL);
+
+ dest_addr = rte_be_to_cpu_32(iphdr->dst_addr);
+
+ /*
+ * Check that it is a valid multicast address and
+ * we have some active ports assigned to it.
+ */
+ if(!IS_IPV4_MCAST(dest_addr) ||
+ (hash = rte_fbk_hash_lookup(mcast_hash, dest_addr)) <= 0 ||
+ (port_mask = hash & enabled_port_mask) == 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+
+ /* Calculate number of destination ports. */
+ port_num = bitcnt(port_mask);
+
+ /* Should we use rte_pktmbuf_clone() or not. */
+ use_clone = (port_num <= MCAST_CLONE_PORTS &&
+ m->nb_segs <= MCAST_CLONE_SEGS);
+
+ /* Mark all packet's segments as referenced port_num times */
+ if (use_clone == 0)
+ rte_pktmbuf_refcnt_update(m, (uint16_t)port_num);
+
+ /* construct destination ethernet address */
+ dst_eth_addr.as_int = ETHER_ADDR_FOR_IPV4_MCAST(dest_addr);
+
+ for (port = 0; use_clone != port_mask; port_mask >>= 1, port++) {
+
+ /* Prepare output packet and send it out. */
+ if ((port_mask & 1) != 0) {
+ if (likely ((mc = mcast_out_pkt(m, use_clone)) != NULL))
+ mcast_send_pkt(mc, &dst_eth_addr.as_addr,
+ qconf, port);
+ else if (use_clone == 0)
+ rte_pktmbuf_free(m);
+ }
+ }
+
+ /*
+ * If we making clone packets, then, for the last destination port,
+ * we can overwrite input packet's metadata.
+ */
+ if (use_clone != 0)
+ mcast_send_pkt(m, &dst_eth_addr.as_addr, qconf, port);
+ else
+ rte_pktmbuf_free(m);
+}
+
+/* Send burst of outgoing packet, if timeout expires. */
+static inline void
+send_timeout_burst(struct lcore_queue_conf *qconf)
+{
+ uint64_t cur_tsc;
+ uint8_t portid;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ cur_tsc = rte_rdtsc();
+ if (likely (cur_tsc < qconf->tx_tsc + drain_tsc))
+ return;
+
+ for (portid = 0; portid < MAX_PORTS; portid++) {
+ if (qconf->tx_mbufs[portid].len != 0)
+ send_burst(qconf, portid);
+ }
+ qconf->tx_tsc = cur_tsc;
+}
+
+/* main processing loop */
+static int
+main_loop(__rte_unused void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ int i, j, nb_rx;
+ uint8_t portid;
+ struct lcore_queue_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, IPv4_MULTICAST, "lcore %u has nothing to do\n",
+ lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, IPv4_MULTICAST, "entering main loop on lcore %u\n",
+ lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i];
+ RTE_LOG(INFO, IPv4_MULTICAST, " -- lcoreid=%u portid=%d\n",
+ lcore_id, (int) portid);
+ }
+
+ while (1) {
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i];
+ nb_rx = rte_eth_rx_burst(portid, 0, pkts_burst,
+ MAX_PKT_BURST);
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_burst[j], void *));
+ }
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ mcast_forward(pkts_burst[j], qconf);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++) {
+ mcast_forward(pkts_burst[j], qconf);
+ }
+ }
+
+ /* Send out packets from TX queues */
+ send_timeout_burst(qconf);
+ }
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n",
+ prgname);
+}
+
+static uint32_t
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+
+ return (uint32_t)pm;
+}
+
+static int
+parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse numerical string */
+ errno = 0;
+ n = strtoul(q_arg, &end, 0);
+ if (errno != 0 || end == NULL || *end != '\0' ||
+ n == 0 || n >= MAX_RX_QUEUE_PER_LCORE)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ rx_queue_per_lcore = parse_nqueue(optarg);
+ if (rx_queue_per_lcore < 0) {
+ printf("invalid queue number\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+static int
+init_mcast_hash(void)
+{
+ uint32_t i;
+
+ mcast_hash_params.socket_id = rte_socket_id();
+ mcast_hash = rte_fbk_hash_create(&mcast_hash_params);
+ if (mcast_hash == NULL){
+ return -1;
+ }
+
+ for (i = 0; i < N_MCAST_GROUPS; i ++){
+ if (rte_fbk_hash_add_key(mcast_hash,
+ mcast_group_table[i].ip,
+ mcast_group_table[i].port_mask) < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ uint16_t queueid;
+ unsigned lcore_id = 0, rx_lcore_id = 0;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid IPV4_MULTICAST parameters\n");
+
+ /* create the mbuf pools */
+ packet_pool = rte_pktmbuf_pool_create("packet_pool", NB_PKT_MBUF, 32,
+ 0, PKT_MBUF_DATA_SIZE, rte_socket_id());
+
+ if (packet_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init packet mbuf pool\n");
+
+ header_pool = rte_pktmbuf_pool_create("header_pool", NB_HDR_MBUF, 32,
+ 0, HDR_MBUF_DATA_SIZE, rte_socket_id());
+
+ if (header_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init header mbuf pool\n");
+
+ clone_pool = rte_pktmbuf_pool_create("clone_pool", NB_CLONE_MBUF, 32,
+ 0, 0, rte_socket_id());
+
+ if (clone_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init clone mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No physical ports!\n");
+ if (nb_ports > MAX_PORTS)
+ nb_ports = MAX_PORTS;
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %d\n", portid);
+ continue;
+ }
+
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ qconf->n_rx_queue == (unsigned)rx_queue_per_lcore) {
+
+ rx_lcore_id ++;
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+ qconf->rx_queue_list[qconf->n_rx_queue] = portid;
+ qconf->n_rx_queue++;
+
+ /* init port */
+ printf("Initializing port %d on lcore %u... ", portid,
+ rx_lcore_id);
+ fflush(stdout);
+
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ ret = rte_eth_dev_configure(portid, 1, (uint16_t)n_tx_queue,
+ &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+
+ /* init one RX queue */
+ queueid = 0;
+ printf("rxq=%hu ", queueid);
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ packet_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, port=%d\n",
+ ret, portid);
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+
+ RTE_LCORE_FOREACH(lcore_id) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ printf("txq=%u,%hu ", lcore_id, queueid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ rte_lcore_to_socket_id(lcore_id), txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_queue_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+ }
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ printf("done:\n");
+ }
+
+ check_all_ports_link_status(nb_ports, enabled_port_mask);
+
+ /* initialize the multicast hash */
+ int retval = init_mcast_hash();
+ if (retval != 0)
+ rte_exit(EXIT_FAILURE, "Cannot build the multicast hash\n");
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/kni/Makefile b/examples/kni/Makefile
new file mode 100644
index 00000000..6800dd5c
--- /dev/null
+++ b/examples/kni/Makefile
@@ -0,0 +1,55 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = kni
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/kni/main.c b/examples/kni/main.c
new file mode 100644
index 00000000..a5297f28
--- /dev/null
+++ b/examples/kni/main.c
@@ -0,0 +1,928 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <netinet/in.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+#include <rte_malloc.h>
+#include <rte_kni.h>
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+/* Max size of a single packet */
+#define MAX_PACKET_SZ 2048
+
+/* Size of the data buffer in each mbuf */
+#define MBUF_DATA_SZ (MAX_PACKET_SZ + RTE_PKTMBUF_HEADROOM)
+
+/* Number of mbufs in mempool that is created */
+#define NB_MBUF (8192 * 16)
+
+/* How many packets to attempt to read from NIC in one go */
+#define PKT_BURST_SZ 32
+
+/* How many objects (mbufs) to keep in per-lcore mempool cache */
+#define MEMPOOL_CACHE_SZ PKT_BURST_SZ
+
+/* Number of RX ring descriptors */
+#define NB_RXD 128
+
+/* Number of TX ring descriptors */
+#define NB_TXD 512
+
+/* Total octets in ethernet header */
+#define KNI_ENET_HEADER_SIZE 14
+
+/* Total octets in the FCS */
+#define KNI_ENET_FCS_SIZE 4
+
+#define KNI_US_PER_SECOND 1000000
+#define KNI_SECOND_PER_DAY 86400
+
+#define KNI_MAX_KTHREAD 32
+/*
+ * Structure of port parameters
+ */
+struct kni_port_params {
+ uint8_t port_id;/* Port ID */
+ unsigned lcore_rx; /* lcore ID for RX */
+ unsigned lcore_tx; /* lcore ID for TX */
+ uint32_t nb_lcore_k; /* Number of lcores for KNI multi kernel threads */
+ uint32_t nb_kni; /* Number of KNI devices to be created */
+ unsigned lcore_k[KNI_MAX_KTHREAD]; /* lcore ID list for kthreads */
+ struct rte_kni *kni[KNI_MAX_KTHREAD]; /* KNI context pointers */
+} __rte_cache_aligned;
+
+static struct kni_port_params *kni_port_params_array[RTE_MAX_ETHPORTS];
+
+
+/* Options for configuring ethernet port */
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .header_split = 0, /* Header Split disabled */
+ .hw_ip_checksum = 0, /* IP checksum offload disabled */
+ .hw_vlan_filter = 0, /* VLAN filtering disabled */
+ .jumbo_frame = 0, /* Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /* CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+/* Mempool for mbufs */
+static struct rte_mempool * pktmbuf_pool = NULL;
+
+/* Mask of enabled ports */
+static uint32_t ports_mask = 0;
+/* Ports set in promiscuous mode off by default. */
+static int promiscuous_on = 0;
+
+/* Structure type for recording kni interface specific stats */
+struct kni_interface_stats {
+ /* number of pkts received from NIC, and sent to KNI */
+ uint64_t rx_packets;
+
+ /* number of pkts received from NIC, but failed to send to KNI */
+ uint64_t rx_dropped;
+
+ /* number of pkts received from KNI, and sent to NIC */
+ uint64_t tx_packets;
+
+ /* number of pkts received from KNI, but failed to send to NIC */
+ uint64_t tx_dropped;
+};
+
+/* kni device statistics array */
+static struct kni_interface_stats kni_stats[RTE_MAX_ETHPORTS];
+
+static int kni_change_mtu(uint8_t port_id, unsigned new_mtu);
+static int kni_config_network_interface(uint8_t port_id, uint8_t if_up);
+
+static rte_atomic32_t kni_stop = RTE_ATOMIC32_INIT(0);
+
+/* Print out statistics on packets handled */
+static void
+print_stats(void)
+{
+ uint8_t i;
+
+ printf("\n**KNI example application statistics**\n"
+ "====== ============== ============ ============ ============ ============\n"
+ " Port Lcore(RX/TX) rx_packets rx_dropped tx_packets tx_dropped\n"
+ "------ -------------- ------------ ------------ ------------ ------------\n");
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (!kni_port_params_array[i])
+ continue;
+
+ printf("%7d %10u/%2u %13"PRIu64" %13"PRIu64" %13"PRIu64" "
+ "%13"PRIu64"\n", i,
+ kni_port_params_array[i]->lcore_rx,
+ kni_port_params_array[i]->lcore_tx,
+ kni_stats[i].rx_packets,
+ kni_stats[i].rx_dropped,
+ kni_stats[i].tx_packets,
+ kni_stats[i].tx_dropped);
+ }
+ printf("====== ============== ============ ============ ============ ============\n");
+}
+
+/* Custom handling of signals to handle stats and kni processing */
+static void
+signal_handler(int signum)
+{
+ /* When we receive a USR1 signal, print stats */
+ if (signum == SIGUSR1) {
+ print_stats();
+ }
+
+ /* When we receive a USR2 signal, reset stats */
+ if (signum == SIGUSR2) {
+ memset(&kni_stats, 0, sizeof(kni_stats));
+ printf("\n**Statistics have been reset**\n");
+ return;
+ }
+
+ /* When we receive a RTMIN or SIGINT signal, stop kni processing */
+ if (signum == SIGRTMIN || signum == SIGINT){
+ printf("SIGRTMIN is received, and the KNI processing is "
+ "going to stop\n");
+ rte_atomic32_inc(&kni_stop);
+ return;
+ }
+}
+
+static void
+kni_burst_free_mbufs(struct rte_mbuf **pkts, unsigned num)
+{
+ unsigned i;
+
+ if (pkts == NULL)
+ return;
+
+ for (i = 0; i < num; i++) {
+ rte_pktmbuf_free(pkts[i]);
+ pkts[i] = NULL;
+ }
+}
+
+/**
+ * Interface to burst rx and enqueue mbufs into rx_q
+ */
+static void
+kni_ingress(struct kni_port_params *p)
+{
+ uint8_t i, port_id;
+ unsigned nb_rx, num;
+ uint32_t nb_kni;
+ struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
+
+ if (p == NULL)
+ return;
+
+ nb_kni = p->nb_kni;
+ port_id = p->port_id;
+ for (i = 0; i < nb_kni; i++) {
+ /* Burst rx from eth */
+ nb_rx = rte_eth_rx_burst(port_id, 0, pkts_burst, PKT_BURST_SZ);
+ if (unlikely(nb_rx > PKT_BURST_SZ)) {
+ RTE_LOG(ERR, APP, "Error receiving from eth\n");
+ return;
+ }
+ /* Burst tx to kni */
+ num = rte_kni_tx_burst(p->kni[i], pkts_burst, nb_rx);
+ kni_stats[port_id].rx_packets += num;
+
+ rte_kni_handle_request(p->kni[i]);
+ if (unlikely(num < nb_rx)) {
+ /* Free mbufs not tx to kni interface */
+ kni_burst_free_mbufs(&pkts_burst[num], nb_rx - num);
+ kni_stats[port_id].rx_dropped += nb_rx - num;
+ }
+ }
+}
+
+/**
+ * Interface to dequeue mbufs from tx_q and burst tx
+ */
+static void
+kni_egress(struct kni_port_params *p)
+{
+ uint8_t i, port_id;
+ unsigned nb_tx, num;
+ uint32_t nb_kni;
+ struct rte_mbuf *pkts_burst[PKT_BURST_SZ];
+
+ if (p == NULL)
+ return;
+
+ nb_kni = p->nb_kni;
+ port_id = p->port_id;
+ for (i = 0; i < nb_kni; i++) {
+ /* Burst rx from kni */
+ num = rte_kni_rx_burst(p->kni[i], pkts_burst, PKT_BURST_SZ);
+ if (unlikely(num > PKT_BURST_SZ)) {
+ RTE_LOG(ERR, APP, "Error receiving from KNI\n");
+ return;
+ }
+ /* Burst tx to eth */
+ nb_tx = rte_eth_tx_burst(port_id, 0, pkts_burst, (uint16_t)num);
+ kni_stats[port_id].tx_packets += nb_tx;
+ if (unlikely(nb_tx < num)) {
+ /* Free mbufs not tx to NIC */
+ kni_burst_free_mbufs(&pkts_burst[nb_tx], num - nb_tx);
+ kni_stats[port_id].tx_dropped += num - nb_tx;
+ }
+ }
+}
+
+static int
+main_loop(__rte_unused void *arg)
+{
+ uint8_t i, nb_ports = rte_eth_dev_count();
+ int32_t f_stop;
+ const unsigned lcore_id = rte_lcore_id();
+ enum lcore_rxtx {
+ LCORE_NONE,
+ LCORE_RX,
+ LCORE_TX,
+ LCORE_MAX
+ };
+ enum lcore_rxtx flag = LCORE_NONE;
+
+ nb_ports = (uint8_t)(nb_ports < RTE_MAX_ETHPORTS ?
+ nb_ports : RTE_MAX_ETHPORTS);
+ for (i = 0; i < nb_ports; i++) {
+ if (!kni_port_params_array[i])
+ continue;
+ if (kni_port_params_array[i]->lcore_rx == (uint8_t)lcore_id) {
+ flag = LCORE_RX;
+ break;
+ } else if (kni_port_params_array[i]->lcore_tx ==
+ (uint8_t)lcore_id) {
+ flag = LCORE_TX;
+ break;
+ }
+ }
+
+ if (flag == LCORE_RX) {
+ RTE_LOG(INFO, APP, "Lcore %u is reading from port %d\n",
+ kni_port_params_array[i]->lcore_rx,
+ kni_port_params_array[i]->port_id);
+ while (1) {
+ f_stop = rte_atomic32_read(&kni_stop);
+ if (f_stop)
+ break;
+ kni_ingress(kni_port_params_array[i]);
+ }
+ } else if (flag == LCORE_TX) {
+ RTE_LOG(INFO, APP, "Lcore %u is writing to port %d\n",
+ kni_port_params_array[i]->lcore_tx,
+ kni_port_params_array[i]->port_id);
+ while (1) {
+ f_stop = rte_atomic32_read(&kni_stop);
+ if (f_stop)
+ break;
+ kni_egress(kni_port_params_array[i]);
+ }
+ } else
+ RTE_LOG(INFO, APP, "Lcore %u has nothing to do\n", lcore_id);
+
+ return 0;
+}
+
+/* Display usage instructions */
+static void
+print_usage(const char *prgname)
+{
+ RTE_LOG(INFO, APP, "\nUsage: %s [EAL options] -- -p PORTMASK -P "
+ "[--config (port,lcore_rx,lcore_tx,lcore_kthread...)"
+ "[,(port,lcore_rx,lcore_tx,lcore_kthread...)]]\n"
+ " -p PORTMASK: hex bitmask of ports to use\n"
+ " -P : enable promiscuous mode\n"
+ " --config (port,lcore_rx,lcore_tx,lcore_kthread...): "
+ "port and lcore configurations\n",
+ prgname);
+}
+
+/* Convert string to unsigned number. 0 is returned if error occurs */
+static uint32_t
+parse_unsigned(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long num;
+
+ num = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+
+ return (uint32_t)num;
+}
+
+static void
+print_config(void)
+{
+ uint32_t i, j;
+ struct kni_port_params **p = kni_port_params_array;
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (!p[i])
+ continue;
+ RTE_LOG(DEBUG, APP, "Port ID: %d\n", p[i]->port_id);
+ RTE_LOG(DEBUG, APP, "Rx lcore ID: %u, Tx lcore ID: %u\n",
+ p[i]->lcore_rx, p[i]->lcore_tx);
+ for (j = 0; j < p[i]->nb_lcore_k; j++)
+ RTE_LOG(DEBUG, APP, "Kernel thread lcore ID: %u\n",
+ p[i]->lcore_k[j]);
+ }
+}
+
+static int
+parse_config(const char *arg)
+{
+ const char *p, *p0 = arg;
+ char s[256], *end;
+ unsigned size;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_LCORE_RX,
+ FLD_LCORE_TX,
+ _NUM_FLD = KNI_MAX_KTHREAD + 3,
+ };
+ int i, j, nb_token;
+ char *str_fld[_NUM_FLD];
+ unsigned long int_fld[_NUM_FLD];
+ uint8_t port_id, nb_kni_port_params = 0;
+
+ memset(&kni_port_params_array, 0, sizeof(kni_port_params_array));
+ while (((p = strchr(p0, '(')) != NULL) &&
+ nb_kni_port_params < RTE_MAX_ETHPORTS) {
+ p++;
+ if ((p0 = strchr(p, ')')) == NULL)
+ goto fail;
+ size = p0 - p;
+ if (size >= sizeof(s)) {
+ printf("Invalid config parameters\n");
+ goto fail;
+ }
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ nb_token = rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',');
+ if (nb_token <= FLD_LCORE_TX) {
+ printf("Invalid config parameters\n");
+ goto fail;
+ }
+ for (i = 0; i < nb_token; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i]) {
+ printf("Invalid config parameters\n");
+ goto fail;
+ }
+ }
+
+ i = 0;
+ port_id = (uint8_t)int_fld[i++];
+ if (port_id >= RTE_MAX_ETHPORTS) {
+ printf("Port ID %d could not exceed the maximum %d\n",
+ port_id, RTE_MAX_ETHPORTS);
+ goto fail;
+ }
+ if (kni_port_params_array[port_id]) {
+ printf("Port %d has been configured\n", port_id);
+ goto fail;
+ }
+ kni_port_params_array[port_id] =
+ rte_zmalloc("KNI_port_params",
+ sizeof(struct kni_port_params), RTE_CACHE_LINE_SIZE);
+ kni_port_params_array[port_id]->port_id = port_id;
+ kni_port_params_array[port_id]->lcore_rx =
+ (uint8_t)int_fld[i++];
+ kni_port_params_array[port_id]->lcore_tx =
+ (uint8_t)int_fld[i++];
+ if (kni_port_params_array[port_id]->lcore_rx >= RTE_MAX_LCORE ||
+ kni_port_params_array[port_id]->lcore_tx >= RTE_MAX_LCORE) {
+ printf("lcore_rx %u or lcore_tx %u ID could not "
+ "exceed the maximum %u\n",
+ kni_port_params_array[port_id]->lcore_rx,
+ kni_port_params_array[port_id]->lcore_tx,
+ (unsigned)RTE_MAX_LCORE);
+ goto fail;
+ }
+ for (j = 0; i < nb_token && j < KNI_MAX_KTHREAD; i++, j++)
+ kni_port_params_array[port_id]->lcore_k[j] =
+ (uint8_t)int_fld[i];
+ kni_port_params_array[port_id]->nb_lcore_k = j;
+ }
+ print_config();
+
+ return 0;
+
+fail:
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (kni_port_params_array[i]) {
+ rte_free(kni_port_params_array[i]);
+ kni_port_params_array[i] = NULL;
+ }
+ }
+
+ return -1;
+}
+
+static int
+validate_parameters(uint32_t portmask)
+{
+ uint32_t i;
+
+ if (!portmask) {
+ printf("No port configured in port mask\n");
+ return -1;
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (((portmask & (1 << i)) && !kni_port_params_array[i]) ||
+ (!(portmask & (1 << i)) && kni_port_params_array[i]))
+ rte_exit(EXIT_FAILURE, "portmask is not consistent "
+ "to port ids specified in --config\n");
+
+ if (kni_port_params_array[i] && !rte_lcore_is_enabled(\
+ (unsigned)(kni_port_params_array[i]->lcore_rx)))
+ rte_exit(EXIT_FAILURE, "lcore id %u for "
+ "port %d receiving not enabled\n",
+ kni_port_params_array[i]->lcore_rx,
+ kni_port_params_array[i]->port_id);
+
+ if (kni_port_params_array[i] && !rte_lcore_is_enabled(\
+ (unsigned)(kni_port_params_array[i]->lcore_tx)))
+ rte_exit(EXIT_FAILURE, "lcore id %u for "
+ "port %d transmitting not enabled\n",
+ kni_port_params_array[i]->lcore_tx,
+ kni_port_params_array[i]->port_id);
+
+ }
+
+ return 0;
+}
+
+#define CMDLINE_OPT_CONFIG "config"
+
+/* Parse the arguments given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, longindex, ret = 0;
+ const char *prgname = argv[0];
+ static struct option longopts[] = {
+ {CMDLINE_OPT_CONFIG, required_argument, NULL, 0},
+ {NULL, 0, NULL, 0}
+ };
+
+ /* Disable printing messages within getopt() */
+ opterr = 0;
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:P", longopts,
+ &longindex)) != EOF) {
+ switch (opt) {
+ case 'p':
+ ports_mask = parse_unsigned(optarg);
+ break;
+ case 'P':
+ promiscuous_on = 1;
+ break;
+ case 0:
+ if (!strncmp(longopts[longindex].name,
+ CMDLINE_OPT_CONFIG,
+ sizeof(CMDLINE_OPT_CONFIG))) {
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("Invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+ break;
+ default:
+ print_usage(prgname);
+ rte_exit(EXIT_FAILURE, "Invalid option specified\n");
+ }
+ }
+
+ /* Check that options were parsed ok */
+ if (validate_parameters(ports_mask) < 0) {
+ print_usage(prgname);
+ rte_exit(EXIT_FAILURE, "Invalid parameters\n");
+ }
+
+ return ret;
+}
+
+/* Initialize KNI subsystem */
+static void
+init_kni(void)
+{
+ unsigned int num_of_kni_ports = 0, i;
+ struct kni_port_params **params = kni_port_params_array;
+
+ /* Calculate the maximum number of KNI interfaces that will be used */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (kni_port_params_array[i]) {
+ num_of_kni_ports += (params[i]->nb_lcore_k ?
+ params[i]->nb_lcore_k : 1);
+ }
+ }
+
+ /* Invoke rte KNI init to preallocate the ports */
+ rte_kni_init(num_of_kni_ports);
+}
+
+/* Initialise a single port on an Ethernet device */
+static void
+init_port(uint8_t port)
+{
+ int ret;
+
+ /* Initialise device and RX/TX queues */
+ RTE_LOG(INFO, APP, "Initialising port %u ...\n", (unsigned)port);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(port, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not configure port%u (%d)\n",
+ (unsigned)port, ret);
+
+ ret = rte_eth_rx_queue_setup(port, 0, NB_RXD,
+ rte_eth_dev_socket_id(port), NULL, pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not setup up RX queue for "
+ "port%u (%d)\n", (unsigned)port, ret);
+
+ ret = rte_eth_tx_queue_setup(port, 0, NB_TXD,
+ rte_eth_dev_socket_id(port), NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not setup up TX queue for "
+ "port%u (%d)\n", (unsigned)port, ret);
+
+ ret = rte_eth_dev_start(port);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not start port%u (%d)\n",
+ (unsigned)port, ret);
+
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(port);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status\n");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/* Callback for request of changing MTU */
+static int
+kni_change_mtu(uint8_t port_id, unsigned new_mtu)
+{
+ int ret;
+ struct rte_eth_conf conf;
+
+ if (port_id >= rte_eth_dev_count()) {
+ RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
+ return -EINVAL;
+ }
+
+ RTE_LOG(INFO, APP, "Change MTU of port %d to %u\n", port_id, new_mtu);
+
+ /* Stop specific port */
+ rte_eth_dev_stop(port_id);
+
+ memcpy(&conf, &port_conf, sizeof(conf));
+ /* Set new MTU */
+ if (new_mtu > ETHER_MAX_LEN)
+ conf.rxmode.jumbo_frame = 1;
+ else
+ conf.rxmode.jumbo_frame = 0;
+
+ /* mtu + length of header + length of FCS = max pkt length */
+ conf.rxmode.max_rx_pkt_len = new_mtu + KNI_ENET_HEADER_SIZE +
+ KNI_ENET_FCS_SIZE;
+ ret = rte_eth_dev_configure(port_id, 1, 1, &conf);
+ if (ret < 0) {
+ RTE_LOG(ERR, APP, "Fail to reconfigure port %d\n", port_id);
+ return ret;
+ }
+
+ /* Restart specific port */
+ ret = rte_eth_dev_start(port_id);
+ if (ret < 0) {
+ RTE_LOG(ERR, APP, "Fail to restart port %d\n", port_id);
+ return ret;
+ }
+
+ return 0;
+}
+
+/* Callback for request of configuring network interface up/down */
+static int
+kni_config_network_interface(uint8_t port_id, uint8_t if_up)
+{
+ int ret = 0;
+
+ if (port_id >= rte_eth_dev_count() || port_id >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, APP, "Invalid port id %d\n", port_id);
+ return -EINVAL;
+ }
+
+ RTE_LOG(INFO, APP, "Configure network interface of %d %s\n",
+ port_id, if_up ? "up" : "down");
+
+ if (if_up != 0) { /* Configure network interface up */
+ rte_eth_dev_stop(port_id);
+ ret = rte_eth_dev_start(port_id);
+ } else /* Configure network interface down */
+ rte_eth_dev_stop(port_id);
+
+ if (ret < 0)
+ RTE_LOG(ERR, APP, "Failed to start port %d\n", port_id);
+
+ return ret;
+}
+
+static int
+kni_alloc(uint8_t port_id)
+{
+ uint8_t i;
+ struct rte_kni *kni;
+ struct rte_kni_conf conf;
+ struct kni_port_params **params = kni_port_params_array;
+
+ if (port_id >= RTE_MAX_ETHPORTS || !params[port_id])
+ return -1;
+
+ params[port_id]->nb_kni = params[port_id]->nb_lcore_k ?
+ params[port_id]->nb_lcore_k : 1;
+
+ for (i = 0; i < params[port_id]->nb_kni; i++) {
+ /* Clear conf at first */
+ memset(&conf, 0, sizeof(conf));
+ if (params[port_id]->nb_lcore_k) {
+ snprintf(conf.name, RTE_KNI_NAMESIZE,
+ "vEth%u_%u", port_id, i);
+ conf.core_id = params[port_id]->lcore_k[i];
+ conf.force_bind = 1;
+ } else
+ snprintf(conf.name, RTE_KNI_NAMESIZE,
+ "vEth%u", port_id);
+ conf.group_id = (uint16_t)port_id;
+ conf.mbuf_size = MAX_PACKET_SZ;
+ /*
+ * The first KNI device associated to a port
+ * is the master, for multiple kernel thread
+ * environment.
+ */
+ if (i == 0) {
+ struct rte_kni_ops ops;
+ struct rte_eth_dev_info dev_info;
+
+ memset(&dev_info, 0, sizeof(dev_info));
+ rte_eth_dev_info_get(port_id, &dev_info);
+ conf.addr = dev_info.pci_dev->addr;
+ conf.id = dev_info.pci_dev->id;
+
+ memset(&ops, 0, sizeof(ops));
+ ops.port_id = port_id;
+ ops.change_mtu = kni_change_mtu;
+ ops.config_network_if = kni_config_network_interface;
+
+ kni = rte_kni_alloc(pktmbuf_pool, &conf, &ops);
+ } else
+ kni = rte_kni_alloc(pktmbuf_pool, &conf, NULL);
+
+ if (!kni)
+ rte_exit(EXIT_FAILURE, "Fail to create kni for "
+ "port: %d\n", port_id);
+ params[port_id]->kni[i] = kni;
+ }
+
+ return 0;
+}
+
+static int
+kni_free_kni(uint8_t port_id)
+{
+ uint8_t i;
+ struct kni_port_params **p = kni_port_params_array;
+
+ if (port_id >= RTE_MAX_ETHPORTS || !p[port_id])
+ return -1;
+
+ for (i = 0; i < p[port_id]->nb_kni; i++) {
+ rte_kni_release(p[port_id]->kni[i]);
+ p[port_id]->kni[i] = NULL;
+ }
+ rte_eth_dev_stop(port_id);
+
+ return 0;
+}
+
+/* Initialise ports/queues etc. and start main loop on each core */
+int
+main(int argc, char** argv)
+{
+ int ret;
+ uint8_t nb_sys_ports, port;
+ unsigned i;
+
+ /* Associate signal_hanlder function with USR signals */
+ signal(SIGUSR1, signal_handler);
+ signal(SIGUSR2, signal_handler);
+ signal(SIGRTMIN, signal_handler);
+ signal(SIGINT, signal_handler);
+
+ /* Initialise EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not initialise EAL (%d)\n", ret);
+ argc -= ret;
+ argv += ret;
+
+ /* Parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Could not parse input parameters\n");
+
+ /* Create the mbuf pool */
+ pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF,
+ MEMPOOL_CACHE_SZ, 0, MBUF_DATA_SZ, rte_socket_id());
+ if (pktmbuf_pool == NULL) {
+ rte_exit(EXIT_FAILURE, "Could not initialise mbuf pool\n");
+ return -1;
+ }
+
+ /* Get number of ports found in scan */
+ nb_sys_ports = rte_eth_dev_count();
+ if (nb_sys_ports == 0)
+ rte_exit(EXIT_FAILURE, "No supported Ethernet device found\n");
+
+ /* Check if the configured port ID is valid */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++)
+ if (kni_port_params_array[i] && i >= nb_sys_ports)
+ rte_exit(EXIT_FAILURE, "Configured invalid "
+ "port ID %u\n", i);
+
+ /* Initialize KNI subsystem */
+ init_kni();
+
+ /* Initialise each port */
+ for (port = 0; port < nb_sys_ports; port++) {
+ /* Skip ports that are not enabled */
+ if (!(ports_mask & (1 << port)))
+ continue;
+ init_port(port);
+
+ if (port >= RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE, "Can not use more than "
+ "%d ports for kni\n", RTE_MAX_ETHPORTS);
+
+ kni_alloc(port);
+ }
+ check_all_ports_link_status(nb_sys_ports, ports_mask);
+
+ /* Launch per-lcore function on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ if (rte_eal_wait_lcore(i) < 0)
+ return -1;
+ }
+
+ /* Release resources */
+ for (port = 0; port < nb_sys_ports; port++) {
+ if (!(ports_mask & (1 << port)))
+ continue;
+ kni_free_kni(port);
+ }
+#ifdef RTE_LIBRTE_XEN_DOM0
+ rte_kni_close();
+#endif
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++)
+ if (kni_port_params_array[i]) {
+ rte_free(kni_port_params_array[i]);
+ kni_port_params_array[i] = NULL;
+ }
+
+ return 0;
+}
diff --git a/examples/l2fwd-cat/Makefile b/examples/l2fwd-cat/Makefile
new file mode 100644
index 00000000..ae921ade
--- /dev/null
+++ b/examples/l2fwd-cat/Makefile
@@ -0,0 +1,70 @@
+# BSD LICENSE
+#
+# Copyright(c) 2016 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+ifeq ($(PQOS_INSTALL_PATH),)
+$(error "Please define PQOS_INSTALL_PATH environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+# Location of PQoS library and includes,
+PQOS_LIBRARY_PATH = $(PQOS_INSTALL_PATH)/libpqos.a
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd-cat
+
+# all source are stored in SRCS-y
+SRCS-y := l2fwd-cat.c cat.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+CFLAGS += -I$(PQOS_INSTALL_PATH)/../include
+CFLAGS_cat.o := -D_GNU_SOURCE
+
+LDLIBS += -L$(PQOS_INSTALL_PATH)
+LDLIBS += $(PQOS_LIBRARY_PATH)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-cat/cat.c b/examples/l2fwd-cat/cat.c
new file mode 100644
index 00000000..bad39305
--- /dev/null
+++ b/examples/l2fwd-cat/cat.c
@@ -0,0 +1,996 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+
+#include <rte_common.h>
+#include <rte_memcpy.h>
+
+#include <pqos.h>
+
+#include "cat.h"
+
+#define BITS_PER_HEX 4
+#define PQOS_MAX_SOCKETS 8
+#define PQOS_MAX_SOCKET_CORES 64
+#define PQOS_MAX_CORES (PQOS_MAX_SOCKET_CORES * PQOS_MAX_SOCKETS)
+
+static const struct pqos_cap *m_cap;
+static const struct pqos_cpuinfo *m_cpu;
+static const struct pqos_capability *m_cap_l3ca;
+static unsigned m_sockets[PQOS_MAX_SOCKETS];
+static unsigned m_sock_count;
+static struct cat_config m_config[PQOS_MAX_CORES];
+static unsigned m_config_count;
+
+static unsigned
+bits_count(uint64_t bitmask)
+{
+ unsigned count = 0;
+
+ for (; bitmask != 0; count++)
+ bitmask &= bitmask - 1;
+
+ return count;
+}
+
+/*
+ * Parse elem, the elem could be single number/range or '(' ')' group
+ * 1) A single number elem, it's just a simple digit. e.g. 9
+ * 2) A single range elem, two digits with a '-' between. e.g. 2-6
+ * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6)
+ * Within group elem, '-' used for a range separator;
+ * ',' used for a single number.
+ */
+static int
+parse_set(const char *input, rte_cpuset_t *cpusetp)
+{
+ unsigned idx;
+ const char *str = input;
+ char *end = NULL;
+ unsigned min, max;
+ const unsigned num = PQOS_MAX_CORES;
+
+ CPU_ZERO(cpusetp);
+
+ while (isblank(*str))
+ str++;
+
+ /* only digit or left bracket is qualify for start point */
+ if ((!isdigit(*str) && *str != '(') || *str == '\0')
+ return -1;
+
+ /* process single number or single range of number */
+ if (*str != '(') {
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+
+ if (errno || end == NULL || idx >= num)
+ return -1;
+
+ while (isblank(*end))
+ end++;
+
+ min = idx;
+ max = idx;
+ if (*end == '-') {
+ /* process single <number>-<number> */
+ end++;
+ while (isblank(*end))
+ end++;
+ if (!isdigit(*end))
+ return -1;
+
+ errno = 0;
+ idx = strtoul(end, &end, 10);
+ if (errno || end == NULL || idx >= num)
+ return -1;
+ max = idx;
+ while (isblank(*end))
+ end++;
+ if (*end != ',' && *end != '\0')
+ return -1;
+ }
+
+ if (*end != ',' && *end != '\0' && *end != '@')
+ return -1;
+
+ for (idx = RTE_MIN(min, max); idx <= RTE_MAX(min, max);
+ idx++)
+ CPU_SET(idx, cpusetp);
+
+ return end - input;
+ }
+
+ /* process set within bracket */
+ str++;
+ while (isblank(*str))
+ str++;
+ if (*str == '\0')
+ return -1;
+
+ min = PQOS_MAX_CORES;
+ do {
+
+ /* go ahead to the first digit */
+ while (isblank(*str))
+ str++;
+ if (!isdigit(*str))
+ return -1;
+
+ /* get the digit value */
+ errno = 0;
+ idx = strtoul(str, &end, 10);
+ if (errno || end == NULL || idx >= num)
+ return -1;
+
+ /* go ahead to separator '-',',' and ')' */
+ while (isblank(*end))
+ end++;
+ if (*end == '-') {
+ if (min == PQOS_MAX_CORES)
+ min = idx;
+ else /* avoid continuous '-' */
+ return -1;
+ } else if ((*end == ',') || (*end == ')')) {
+ max = idx;
+ if (min == PQOS_MAX_CORES)
+ min = idx;
+ for (idx = RTE_MIN(min, max); idx <= RTE_MAX(min, max);
+ idx++)
+ CPU_SET(idx, cpusetp);
+
+ min = PQOS_MAX_CORES;
+ } else
+ return -1;
+
+ str = end + 1;
+ } while (*end != '\0' && *end != ')');
+
+ return str - input;
+}
+
+/* Test if bitmask is contiguous */
+static int
+is_contiguous(uint64_t bitmask)
+{
+ /* check if bitmask is contiguous */
+ unsigned i = 0;
+ unsigned j = 0;
+ const unsigned max_idx = (sizeof(bitmask) * CHAR_BIT);
+
+ if (bitmask == 0)
+ return 0;
+
+ for (i = 0; i < max_idx; i++) {
+ if (((1ULL << i) & bitmask) != 0)
+ j++;
+ else if (j > 0)
+ break;
+ }
+
+ if (bits_count(bitmask) != j) {
+ printf("PQOS: mask 0x%llx is not contiguous.\n",
+ (unsigned long long)bitmask);
+ return 0;
+ }
+
+ return 1;
+}
+
+/*
+ * The format pattern: --l3ca='<cbm@cpus>[,<(ccbm,dcbm)@cpus>...]'
+ * cbm could be a single mask or for a CDP enabled system, a group of two masks
+ * ("code cbm" and "data cbm")
+ * '(' and ')' are necessary if it's a group.
+ * cpus could be a single digit/range or a group.
+ * '(' and ')' are necessary if it's a group.
+ *
+ * e.g. '0x00F00@(1,3), 0x0FF00@(4-6), 0xF0000@7'
+ * - CPUs 1 and 3 share its 4 ways with CPUs 4, 5 and 6;
+ * - CPUs 4,5 and 6 share half (4 out of 8 ways) of its L3 with 1 and 3;
+ * - CPUs 4,5 and 6 have exclusive access to 4 out of 8 ways;
+ * - CPU 7 has exclusive access to all of its 4 ways;
+ *
+ * e.g. '(0x00C00,0x00300)@(1,3)' for a CDP enabled system
+ * - cpus 1 and 3 have access to 2 ways for code and 2 ways for data,
+ * code and data ways are not overlapping.;
+ */
+static int
+parse_l3ca(const char *l3ca)
+{
+ unsigned idx = 0;
+ const char *cbm_start = NULL;
+ char *cbm_end = NULL;
+ const char *end = NULL;
+ int offset;
+ rte_cpuset_t cpuset;
+ uint64_t mask = 0;
+ uint64_t cmask = 0;
+
+ if (l3ca == NULL)
+ goto err;
+
+ /* Get cbm */
+ do {
+ CPU_ZERO(&cpuset);
+ mask = 0;
+ cmask = 0;
+
+ while (isblank(*l3ca))
+ l3ca++;
+
+ if (*l3ca == '\0')
+ goto err;
+
+ /* record mask_set start point */
+ cbm_start = l3ca;
+
+ /* go across a complete bracket */
+ if (*cbm_start == '(') {
+ l3ca += strcspn(l3ca, ")");
+ if (*l3ca++ == '\0')
+ goto err;
+ }
+
+ /* scan the separator '@', ','(next) or '\0'(finish) */
+ l3ca += strcspn(l3ca, "@,");
+
+ if (*l3ca == '@') {
+ /* explicit assign cpu_set */
+ offset = parse_set(l3ca + 1, &cpuset);
+ if (offset < 0 || CPU_COUNT(&cpuset) == 0)
+ goto err;
+
+ end = l3ca + 1 + offset;
+ } else
+ goto err;
+
+ if (*end != ',' && *end != '\0')
+ goto err;
+
+ /* parse mask_set from start point */
+ if (*cbm_start == '(') {
+ cbm_start++;
+
+ while (isblank(*cbm_start))
+ cbm_start++;
+
+ if (!isxdigit(*cbm_start))
+ goto err;
+
+ errno = 0;
+ cmask = strtoul(cbm_start, &cbm_end, 16);
+ if (errno != 0 || cbm_end == NULL || cmask == 0)
+ goto err;
+
+ while (isblank(*cbm_end))
+ cbm_end++;
+
+ if (*cbm_end != ',')
+ goto err;
+
+ cbm_end++;
+
+ while (isblank(*cbm_end))
+ cbm_end++;
+
+ if (!isxdigit(*cbm_end))
+ goto err;
+
+ errno = 0;
+ mask = strtoul(cbm_end, &cbm_end, 16);
+ if (errno != 0 || cbm_end == NULL || mask == 0)
+ goto err;
+ } else {
+ while (isblank(*cbm_start))
+ cbm_start++;
+
+ if (!isxdigit(*cbm_start))
+ goto err;
+
+ errno = 0;
+ mask = strtoul(cbm_start, &cbm_end, 16);
+ if (errno != 0 || cbm_end == NULL || mask == 0)
+ goto err;
+
+ }
+
+ if (mask == 0 || is_contiguous(mask) == 0)
+ goto err;
+
+ if (cmask != 0 && is_contiguous(cmask) == 0)
+ goto err;
+
+ rte_memcpy(&m_config[idx].cpumask,
+ &cpuset, sizeof(rte_cpuset_t));
+
+ if (cmask != 0) {
+ m_config[idx].cdp = 1;
+ m_config[idx].code_mask = cmask;
+ m_config[idx].data_mask = mask;
+ } else
+ m_config[idx].mask = mask;
+
+ m_config_count++;
+
+ l3ca = end + 1;
+ idx++;
+ } while (*end != '\0' && idx < PQOS_MAX_CORES);
+
+ if (m_config_count == 0)
+ goto err;
+
+ return 0;
+
+err:
+ return -EINVAL;
+}
+
+static int
+check_cpus_overlapping(void)
+{
+ unsigned i = 0;
+ unsigned j = 0;
+ rte_cpuset_t mask;
+
+ CPU_ZERO(&mask);
+
+ for (i = 0; i < m_config_count; i++) {
+ for (j = i + 1; j < m_config_count; j++) {
+ CPU_AND(&mask,
+ &m_config[i].cpumask,
+ &m_config[j].cpumask);
+
+ if (CPU_COUNT(&mask) != 0) {
+ printf("PQOS: Requested CPUs sets are "
+ "overlapping.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int
+check_cpus(void)
+{
+ unsigned i = 0;
+ unsigned cpu_id = 0;
+ unsigned cos_id = 0;
+ int ret = 0;
+
+ for (i = 0; i < m_config_count; i++) {
+ for (cpu_id = 0; cpu_id < PQOS_MAX_CORES; cpu_id++) {
+ if (CPU_ISSET(cpu_id, &m_config[i].cpumask) != 0) {
+
+ ret = pqos_cpu_check_core(m_cpu, cpu_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: %u is not a valid "
+ "logical core id.\n", cpu_id);
+ ret = -ENODEV;
+ goto exit;
+ }
+
+ ret = pqos_l3ca_assoc_get(cpu_id, &cos_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to read COS "
+ "associated to cpu %u.\n",
+ cpu_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ /*
+ * Check if COS assigned to lcore is different
+ * then default one (#0)
+ */
+ if (cos_id != 0) {
+ printf("PQOS: cpu %u has already "
+ "associated COS#%u. "
+ "Please reset L3CA.\n",
+ cpu_id, cos_id);
+ ret = -EBUSY;
+ goto exit;
+ }
+ }
+ }
+ }
+
+exit:
+ return ret;
+}
+
+static int
+check_cdp(void)
+{
+ unsigned i = 0;
+
+ for (i = 0; i < m_config_count; i++) {
+ if (m_config[i].cdp == 1 && m_cap_l3ca->u.l3ca->cdp_on == 0) {
+ if (m_cap_l3ca->u.l3ca->cdp == 0) {
+ printf("PQOS: CDP requested but not "
+ "supported.\n");
+ } else {
+ printf("PQOS: CDP requested but not enabled. "
+ "Please enable CDP.\n");
+ }
+ return -ENOTSUP;
+ }
+ }
+
+ return 0;
+}
+
+static int
+check_cbm_len_and_contention(void)
+{
+ unsigned i = 0;
+ uint64_t mask = 0;
+ const uint64_t not_cbm = (UINT64_MAX << (m_cap_l3ca->u.l3ca->num_ways));
+ const uint64_t cbm_contention_mask = m_cap_l3ca->u.l3ca->way_contention;
+ int ret = 0;
+
+ for (i = 0; i < m_config_count; i++) {
+ if (m_config[i].cdp == 1)
+ mask = m_config[i].code_mask | m_config[i].data_mask;
+ else
+ mask = m_config[i].mask;
+
+ if ((mask & not_cbm) != 0) {
+ printf("PQOS: One or more of requested CBM masks not "
+ "supported by system (too long).\n");
+ ret = -ENOTSUP;
+ break;
+ }
+
+ /* Just a warning */
+ if ((mask & cbm_contention_mask) != 0) {
+ printf("PQOS: One or more of requested CBM masks "
+ "overlap CBM contention mask.\n");
+ break;
+ }
+
+ }
+
+ return ret;
+}
+
+static int
+check_and_select_classes(unsigned cos_id_map[][PQOS_MAX_SOCKETS])
+{
+ unsigned i = 0;
+ unsigned j = 0;
+ unsigned phy_pkg_id = 0;
+ unsigned cos_id = 0;
+ unsigned cpu_id = 0;
+ unsigned phy_pkg_lcores[PQOS_MAX_SOCKETS][m_config_count];
+ const unsigned cos_num = m_cap_l3ca->u.l3ca->num_classes;
+ unsigned used_cos_table[PQOS_MAX_SOCKETS][cos_num];
+ int ret = 0;
+
+ memset(phy_pkg_lcores, 0, sizeof(phy_pkg_lcores));
+ memset(used_cos_table, 0, sizeof(used_cos_table));
+
+ /* detect currently used COS */
+ for (j = 0; j < m_cpu->num_cores; j++) {
+ cpu_id = m_cpu->cores[j].lcore;
+
+ ret = pqos_l3ca_assoc_get(cpu_id, &cos_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to read COS associated to "
+ "cpu %u on phy_pkg %u.\n", cpu_id, phy_pkg_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to get socket for cpu %u\n",
+ cpu_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ /* Mark COS as used */
+ if (used_cos_table[phy_pkg_id][cos_id] == 0)
+ used_cos_table[phy_pkg_id][cos_id]++;
+ }
+
+ /* look for avail. COS to fulfill requested config */
+ for (i = 0; i < m_config_count; i++) {
+ for (j = 0; j < m_cpu->num_cores; j++) {
+ cpu_id = m_cpu->cores[j].lcore;
+ if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0)
+ continue;
+
+ ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to get socket for "
+ "cpu %u\n", cpu_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ /*
+ * Check if we already have COS selected
+ * to be used for that group on that socket
+ */
+ if (phy_pkg_lcores[phy_pkg_id][i] != 0)
+ continue;
+
+ phy_pkg_lcores[phy_pkg_id][i]++;
+
+ /* Search for avail. COS to be used on that socket */
+ for (cos_id = 0; cos_id < cos_num; cos_id++) {
+ if (used_cos_table[phy_pkg_id][cos_id] == 0) {
+ used_cos_table[phy_pkg_id][cos_id]++;
+ cos_id_map[i][phy_pkg_id] = cos_id;
+ break;
+ }
+ }
+
+ /* If there is no COS available ...*/
+ if (cos_id == cos_num) {
+ ret = -E2BIG;
+ goto exit;
+ }
+ }
+ }
+
+exit:
+ if (ret != 0)
+ printf("PQOS: Not enough available COS to configure "
+ "requested configuration.\n");
+
+ return ret;
+}
+
+static int
+configure_cat(unsigned cos_id_map[][PQOS_MAX_SOCKETS])
+{
+ unsigned phy_pkg_id = 0;
+ unsigned cpu_id = 0;
+ unsigned cos_id = 0;
+ unsigned i = 0;
+ unsigned j = 0;
+ struct pqos_l3ca l3ca = {0};
+ int ret = 0;
+
+ for (i = 0; i < m_config_count; i++) {
+ memset(&l3ca, 0, sizeof(l3ca));
+
+ l3ca.cdp = m_config[i].cdp;
+ if (m_config[i].cdp == 1) {
+ l3ca.code_mask = m_config[i].code_mask;
+ l3ca.data_mask = m_config[i].data_mask;
+ } else
+ l3ca.ways_mask = m_config[i].mask;
+
+ for (j = 0; j < m_sock_count; j++) {
+ phy_pkg_id = m_sockets[j];
+ if (cos_id_map[i][phy_pkg_id] == 0)
+ continue;
+
+ l3ca.class_id = cos_id_map[i][phy_pkg_id];
+
+ ret = pqos_l3ca_set(phy_pkg_id, 1, &l3ca);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to set COS %u on "
+ "phy_pkg %u.\n", l3ca.class_id,
+ phy_pkg_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+ }
+ }
+
+ for (i = 0; i < m_config_count; i++) {
+ for (j = 0; j < m_cpu->num_cores; j++) {
+ cpu_id = m_cpu->cores[j].lcore;
+ if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0)
+ continue;
+
+ ret = pqos_cpu_get_socketid(m_cpu, cpu_id, &phy_pkg_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to get socket for "
+ "cpu %u\n", cpu_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+
+ cos_id = cos_id_map[i][phy_pkg_id];
+
+ ret = pqos_l3ca_assoc_set(cpu_id, cos_id);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to associate COS %u to "
+ "cpu %u\n", cos_id, cpu_id);
+ ret = -EFAULT;
+ goto exit;
+ }
+ }
+ }
+
+exit:
+ return ret;
+}
+
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt = 0;
+ int retval = 0;
+ int oldopterr = 0;
+ char **argvopt = argv;
+ char *prgname = argv[0];
+
+ static struct option lgopts[] = {
+ { "l3ca", required_argument, 0, 0 },
+ { NULL, 0, 0, 0 }
+ };
+
+ /* Disable printing messages within getopt() */
+ oldopterr = opterr;
+ opterr = 0;
+
+ opt = getopt_long(argc, argvopt, "", lgopts, NULL);
+ if (opt == 0) {
+ retval = parse_l3ca(optarg);
+ if (retval != 0) {
+ printf("PQOS: Invalid L3CA parameters!\n");
+ goto exit;
+ }
+
+ argv[optind - 1] = prgname;
+ retval = optind - 1;
+ } else
+ retval = 0;
+
+exit:
+ /* reset getopt lib */
+ optind = 0;
+
+ /* Restore opterr value */
+ opterr = oldopterr;
+
+ return retval;
+}
+
+static void
+print_cmd_line_config(void)
+{
+ char cpustr[PQOS_MAX_CORES * 3] = {0};
+ unsigned i = 0;
+ unsigned j = 0;
+
+ for (i = 0; i < m_config_count; i++) {
+ unsigned len = 0;
+ memset(cpustr, 0, sizeof(cpustr));
+
+ /* Generate CPU list */
+ for (j = 0; j < PQOS_MAX_CORES; j++) {
+ if (CPU_ISSET(j, &m_config[i].cpumask) != 1)
+ continue;
+
+ len += snprintf(cpustr + len, sizeof(cpustr) - len - 1,
+ "%u,", j);
+
+ if (len >= sizeof(cpustr) - 1)
+ break;
+ }
+
+ if (m_config[i].cdp == 1) {
+ printf("PQOS: CPUs: %s cMASK: 0x%llx, dMASK: "
+ "0x%llx\n", cpustr,
+ (unsigned long long)m_config[i].code_mask,
+ (unsigned long long)m_config[i].data_mask);
+ } else {
+ printf("PQOS: CPUs: %s MASK: 0x%llx\n", cpustr,
+ (unsigned long long)m_config[i].mask);
+ }
+ }
+}
+
+/**
+ * @brief Prints CAT configuration
+ */
+static void
+print_cat_config(void)
+{
+ int ret = PQOS_RETVAL_OK;
+ unsigned i = 0;
+
+ for (i = 0; i < m_sock_count; i++) {
+ struct pqos_l3ca tab[PQOS_MAX_L3CA_COS] = {{0} };
+ unsigned num = 0;
+ unsigned n = 0;
+
+ ret = pqos_l3ca_get(m_sockets[i], PQOS_MAX_L3CA_COS, &num, tab);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Error retrieving COS!\n");
+ return;
+ }
+
+ printf("PQOS: COS definitions for Socket %u:\n", m_sockets[i]);
+ for (n = 0; n < num; n++) {
+ if (tab[n].cdp == 1) {
+ printf("PQOS: COS: %u, cMASK: 0x%llx, "
+ "dMASK: 0x%llx\n", tab[n].class_id,
+ (unsigned long long)tab[n].code_mask,
+ (unsigned long long)tab[n].data_mask);
+ } else {
+ printf("PQOS: COS: %u, MASK: 0x%llx\n",
+ tab[n].class_id,
+ (unsigned long long)tab[n].ways_mask);
+ }
+ }
+ }
+
+ for (i = 0; i < m_sock_count; i++) {
+ unsigned lcores[PQOS_MAX_SOCKET_CORES] = {0};
+ unsigned lcount = 0;
+ unsigned n = 0;
+
+ ret = pqos_cpu_get_cores(m_cpu, m_sockets[i],
+ PQOS_MAX_SOCKET_CORES, &lcount, &lcores[0]);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Error retrieving core information!\n");
+ return;
+ }
+
+ printf("PQOS: CPU information for socket %u:\n", m_sockets[i]);
+ for (n = 0; n < lcount; n++) {
+ unsigned class_id = 0;
+
+ ret = pqos_l3ca_assoc_get(lcores[n], &class_id);
+ if (ret == PQOS_RETVAL_OK)
+ printf("PQOS: CPU: %u, COS: %u\n", lcores[n],
+ class_id);
+ else
+ printf("PQOS: CPU: %u, ERROR\n", lcores[n]);
+ }
+ }
+
+}
+
+static int
+cat_validate(void)
+{
+ int ret = 0;
+
+ ret = check_cpus();
+ if (ret != 0)
+ return ret;
+
+ ret = check_cdp();
+ if (ret != 0)
+ return ret;
+
+ ret = check_cbm_len_and_contention();
+ if (ret != 0)
+ return ret;
+
+ ret = check_cpus_overlapping();
+ if (ret != 0)
+ return ret;
+
+ return 0;
+}
+
+static int
+cat_set(void)
+{
+ int ret = 0;
+ unsigned cos_id_map[m_config_count][PQOS_MAX_SOCKETS];
+
+ memset(cos_id_map, 0, sizeof(cos_id_map));
+
+ ret = check_and_select_classes(cos_id_map);
+ if (ret != 0)
+ return ret;
+
+ ret = configure_cat(cos_id_map);
+ if (ret != 0)
+ return ret;
+
+ return 0;
+}
+
+static void
+cat_fini(void)
+{
+ int ret = 0;
+
+ printf("PQOS: Shutting down PQoS library...\n");
+
+ /* deallocate all the resources */
+ ret = pqos_fini();
+ if (ret != PQOS_RETVAL_OK && ret != PQOS_RETVAL_INIT)
+ printf("PQOS: Error shutting down PQoS library!\n");
+
+ m_cap = NULL;
+ m_cpu = NULL;
+ m_cap_l3ca = NULL;
+ memset(m_sockets, 0, sizeof(m_sockets));
+ m_sock_count = 0;
+ memset(m_config, 0, sizeof(m_config));
+ m_config_count = 0;
+}
+
+void
+cat_exit(void)
+{
+ unsigned i = 0;
+ unsigned j = 0;
+ unsigned cpu_id = 0;
+ int ret = 0;
+
+ /* if lib is not initialized, do nothing */
+ if (m_cap == NULL && m_cpu == NULL)
+ return;
+
+ printf("PQOS: Reverting CAT configuration...\n");
+
+ for (i = 0; i < m_config_count; i++) {
+ for (j = 0; j < m_cpu->num_cores; j++) {
+ cpu_id = m_cpu->cores[j].lcore;
+ if (CPU_ISSET(cpu_id, &m_config[i].cpumask) == 0)
+ continue;
+
+ ret = pqos_l3ca_assoc_set(cpu_id, 0);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Failed to associate COS 0 to "
+ "cpu %u\n", cpu_id);
+ }
+ }
+ }
+
+ cat_fini();
+}
+
+static void
+signal_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM) {
+ printf("\nPQOS: Signal %d received, preparing to exit...\n",
+ signum);
+
+ cat_exit();
+
+ /* exit with the expected status */
+ signal(signum, SIG_DFL);
+ kill(getpid(), signum);
+ }
+}
+
+int
+cat_init(int argc, char **argv)
+{
+ int ret = 0;
+ int args_num = 0;
+ struct pqos_config cfg = {0};
+
+ if (m_cap != NULL || m_cpu != NULL) {
+ printf("PQOS: CAT module already initialized!\n");
+ return -EEXIST;
+ }
+
+ /* Parse cmd line args */
+ ret = parse_args(argc, argv);
+
+ if (ret <= 0)
+ goto err;
+
+ args_num = ret;
+
+ /* Print cmd line configuration */
+ print_cmd_line_config();
+
+ /* PQoS Initialization - Check and initialize CAT capability */
+ cfg.fd_log = STDOUT_FILENO;
+ cfg.verbose = 0;
+ cfg.cdp_cfg = PQOS_REQUIRE_CDP_ANY;
+ ret = pqos_init(&cfg);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Error initializing PQoS library!\n");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ /* Get capability and CPU info pointer */
+ ret = pqos_cap_get(&m_cap, &m_cpu);
+ if (ret != PQOS_RETVAL_OK || m_cap == NULL || m_cpu == NULL) {
+ printf("PQOS: Error retrieving PQoS capabilities!\n");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ /* Get L3CA capabilities */
+ ret = pqos_cap_get_type(m_cap, PQOS_CAP_TYPE_L3CA, &m_cap_l3ca);
+ if (ret != PQOS_RETVAL_OK || m_cap_l3ca == NULL) {
+ printf("PQOS: Error retrieving PQOS_CAP_TYPE_L3CA "
+ "capabilities!\n");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ /* Get CPU socket information */
+ ret = pqos_cpu_get_sockets(m_cpu, PQOS_MAX_SOCKETS, &m_sock_count,
+ m_sockets);
+ if (ret != PQOS_RETVAL_OK) {
+ printf("PQOS: Error retrieving CPU socket information!\n");
+ ret = -EFAULT;
+ goto err;
+ }
+
+ /* Validate cmd line configuration */
+ ret = cat_validate();
+ if (ret != 0) {
+ printf("PQOS: Requested CAT configuration is not valid!\n");
+ goto err;
+ }
+
+ /* configure system */
+ ret = cat_set();
+ if (ret != 0) {
+ printf("PQOS: Failed to configure CAT!\n");
+ goto err;
+ }
+
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ ret = atexit(cat_exit);
+ if (ret != 0) {
+ printf("PQOS: Cannot set exit function\n");
+ goto err;
+ }
+
+ /* Print CAT configuration */
+ print_cat_config();
+
+ return args_num;
+
+err:
+ /* deallocate all the resources */
+ cat_fini();
+ return ret;
+}
diff --git a/examples/l2fwd-cat/cat.h b/examples/l2fwd-cat/cat.h
new file mode 100644
index 00000000..aef2b768
--- /dev/null
+++ b/examples/l2fwd-cat/cat.h
@@ -0,0 +1,72 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CAT_H
+#define _CAT_H
+
+/**
+ * @file
+ * PQoS CAT
+ */
+
+#include <stdint.h>
+#include <string.h>
+
+#include <rte_lcore.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* L3 cache allocation class of service data structure */
+struct cat_config {
+ rte_cpuset_t cpumask; /* CPUs bitmask */
+ int cdp; /* data & code masks used if true */
+ union {
+ uint64_t mask; /* capacity bitmask (CBM) */
+ struct {
+ uint64_t data_mask; /* data capacity bitmask (CBM) */
+ uint64_t code_mask; /* code capacity bitmask (CBM) */
+ };
+ };
+};
+
+int cat_init(int argc, char **argv);
+
+void cat_exit(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _CAT_H */
diff --git a/examples/l2fwd-cat/l2fwd-cat.c b/examples/l2fwd-cat/l2fwd-cat.c
new file mode 100644
index 00000000..8cce33b8
--- /dev/null
+++ b/examples/l2fwd-cat/l2fwd-cat.c
@@ -0,0 +1,224 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#include "cat.h"
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN }
+};
+
+/* l2fwd-cat.c: CAT enabled, basic DPDK skeleton forwarding example. */
+
+/*
+ * Initializes a given port using global settings and with the RX buffers
+ * coming from the mbuf_pool passed as a parameter.
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Allocate and set up 1 RX queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Allocate and set up 1 TX queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Display the port MAC address. */
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+ " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ /* Enable RX in promiscuous mode for the Ethernet device. */
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+/*
+ * The lcore main. This is the main thread that does the work, reading from
+ * an input port and writing to an output port.
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ uint8_t port;
+
+ /*
+ * Check that the port is on the same NUMA node as the polling thread
+ * for best performance.
+ */
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+
+ /* Run until the application is quit or killed. */
+ for (;;) {
+ /*
+ * Receive packets on a port and forward them on the paired
+ * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
+ */
+ for (port = 0; port < nb_ports; port++) {
+
+ /* Get burst of RX packets, from first port of pair. */
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+
+ if (unlikely(nb_rx == 0))
+ continue;
+
+ /* Send burst of TX packets, to second port of pair. */
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+
+ /* Free any unsent packets. */
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/*
+ * The main function, which does initialization and calls the per-lcore
+ * functions.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ unsigned nb_ports;
+ uint8_t portid;
+
+ /* Initialize the Environment Abstraction Layer (EAL). */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+
+ argc -= ret;
+ argv += ret;
+
+ /*
+ * Initialize the PQoS library and configure CAT.
+ * Please see l2fwd-cat documentation for more info.
+ */
+ ret = cat_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "PQOS: L3CA init failed!\n");
+
+ argc -= ret;
+ argv += ret;
+
+ /* Check that there is an even number of ports to send/receive on. */
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ /* Creates a new mempool in memory to hold the mbufs. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* Initialize all ports. */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
+
+ /* Call lcore_main on the master core only. */
+ lcore_main();
+
+ return 0;
+}
diff --git a/examples/l2fwd-crypto/Makefile b/examples/l2fwd-crypto/Makefile
new file mode 100644
index 00000000..e8224cae
--- /dev/null
+++ b/examples/l2fwd-crypto/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd-crypto
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-crypto/main.c b/examples/l2fwd-crypto/main.c
new file mode 100644
index 00000000..d4e2d8de
--- /dev/null
+++ b/examples/l2fwd-crypto/main.c
@@ -0,0 +1,2056 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <time.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_common.h>
+#include <rte_cryptodev.h>
+#include <rte_cycles.h>
+#include <rte_debug.h>
+#include <rte_eal.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_interrupts.h>
+#include <rte_ip.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_memzone.h>
+#include <rte_pci.h>
+#include <rte_per_lcore.h>
+#include <rte_prefetch.h>
+#include <rte_random.h>
+#include <rte_ring.h>
+#include <rte_hexdump.h>
+
+enum cdev_type {
+ CDEV_TYPE_ANY,
+ CDEV_TYPE_HW,
+ CDEV_TYPE_SW
+};
+
+#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
+
+#define NB_MBUF 8192
+
+#define MAX_STR_LEN 32
+#define MAX_KEY_SIZE 128
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint64_t l2fwd_enabled_port_mask;
+static uint64_t l2fwd_enabled_crypto_mask;
+
+/* list of enabled ports */
+static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
+
+
+struct pkt_buffer {
+ unsigned len;
+ struct rte_mbuf *buffer[MAX_PKT_BURST];
+};
+
+struct op_buffer {
+ unsigned len;
+ struct rte_crypto_op *buffer[MAX_PKT_BURST];
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+
+enum l2fwd_crypto_xform_chain {
+ L2FWD_CRYPTO_CIPHER_HASH,
+ L2FWD_CRYPTO_HASH_CIPHER,
+ L2FWD_CRYPTO_CIPHER_ONLY,
+ L2FWD_CRYPTO_HASH_ONLY
+};
+
+struct l2fwd_key {
+ uint8_t *data;
+ uint32_t length;
+ phys_addr_t phys_addr;
+};
+
+char supported_auth_algo[RTE_CRYPTO_AUTH_LIST_END][MAX_STR_LEN];
+char supported_cipher_algo[RTE_CRYPTO_CIPHER_LIST_END][MAX_STR_LEN];
+
+/** l2fwd crypto application command line options */
+struct l2fwd_crypto_options {
+ unsigned portmask;
+ unsigned nb_ports_per_lcore;
+ unsigned refresh_period;
+ unsigned single_lcore:1;
+
+ enum cdev_type type;
+ unsigned sessionless:1;
+
+ enum l2fwd_crypto_xform_chain xform_chain;
+
+ struct rte_crypto_sym_xform cipher_xform;
+ unsigned ckey_param;
+ int ckey_random_size;
+
+ struct l2fwd_key iv;
+ unsigned iv_param;
+ int iv_random_size;
+
+ struct rte_crypto_sym_xform auth_xform;
+ uint8_t akey_param;
+ int akey_random_size;
+
+ struct l2fwd_key aad;
+ unsigned aad_param;
+ int aad_random_size;
+
+ int digest_size;
+
+ uint16_t block_size;
+ char string_type[MAX_STR_LEN];
+};
+
+/** l2fwd crypto lcore params */
+struct l2fwd_crypto_params {
+ uint8_t dev_id;
+ uint8_t qp_id;
+
+ unsigned digest_length;
+ unsigned block_size;
+
+ struct l2fwd_key iv;
+ struct l2fwd_key aad;
+ struct rte_cryptodev_sym_session *session;
+
+ uint8_t do_cipher;
+ uint8_t do_hash;
+ uint8_t hash_verify;
+
+ enum rte_crypto_cipher_algorithm cipher_algo;
+ enum rte_crypto_auth_algorithm auth_algo;
+};
+
+/** lcore configuration */
+struct lcore_queue_conf {
+ unsigned nb_rx_ports;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+
+ unsigned nb_crypto_devs;
+ unsigned cryptodev_list[MAX_RX_QUEUE_PER_LCORE];
+
+ struct op_buffer op_buf[RTE_MAX_ETHPORTS];
+ struct pkt_buffer pkt_buf[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_NONE,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+struct rte_mempool *l2fwd_pktmbuf_pool;
+struct rte_mempool *l2fwd_crypto_op_pool;
+
+/* Per-port statistics struct */
+struct l2fwd_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+
+ uint64_t crypto_enqueued;
+ uint64_t crypto_dequeued;
+
+ uint64_t dropped;
+} __rte_cache_aligned;
+
+struct l2fwd_crypto_statistics {
+ uint64_t enqueued;
+ uint64_t dequeued;
+
+ uint64_t errors;
+} __rte_cache_aligned;
+
+struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
+struct l2fwd_crypto_statistics crypto_statistics[RTE_MAX_ETHPORTS];
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */
+#define MAX_TIMER_PERIOD 86400UL /* 1 day max */
+
+/* default period is 10 seconds */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000;
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ uint64_t total_packets_enqueued, total_packets_dequeued,
+ total_packets_errors;
+ unsigned portid;
+ uint64_t cdevid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+ total_packets_enqueued = 0;
+ total_packets_dequeued = 0;
+ total_packets_errors = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %32"PRIu64
+ "\nPackets received: %28"PRIu64
+ "\nPackets dropped: %29"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+ printf("\nCrypto statistics ==================================");
+
+ for (cdevid = 0; cdevid < RTE_CRYPTO_MAX_DEVS; cdevid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_crypto_mask & (1lu << cdevid)) == 0)
+ continue;
+ printf("\nStatistics for cryptodev %"PRIu64
+ " -------------------------"
+ "\nPackets enqueued: %28"PRIu64
+ "\nPackets dequeued: %28"PRIu64
+ "\nPackets errors: %30"PRIu64,
+ cdevid,
+ crypto_statistics[cdevid].enqueued,
+ crypto_statistics[cdevid].dequeued,
+ crypto_statistics[cdevid].errors);
+
+ total_packets_enqueued += crypto_statistics[cdevid].enqueued;
+ total_packets_dequeued += crypto_statistics[cdevid].dequeued;
+ total_packets_errors += crypto_statistics[cdevid].errors;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets received: %22"PRIu64
+ "\nTotal packets enqueued: %22"PRIu64
+ "\nTotal packets dequeued: %22"PRIu64
+ "\nTotal packets sent: %26"PRIu64
+ "\nTotal packets dropped: %23"PRIu64
+ "\nTotal packets crypto errors: %17"PRIu64,
+ total_packets_rx,
+ total_packets_enqueued,
+ total_packets_dequeued,
+ total_packets_tx,
+ total_packets_dropped,
+ total_packets_errors);
+ printf("\n====================================================\n");
+}
+
+static void
+fill_supported_algorithm_tables(void)
+{
+ unsigned i;
+
+ for (i = 0; i < RTE_CRYPTO_AUTH_LIST_END; i++)
+ strcpy(supported_auth_algo[i], "NOT_SUPPORTED");
+
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_AES_GCM], "AES_GCM");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_MD5_HMAC], "MD5_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_NULL], "NULL");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA1_HMAC], "SHA1_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA224_HMAC], "SHA224_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA256_HMAC], "SHA256_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA384_HMAC], "SHA384_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SHA512_HMAC], "SHA512_HMAC");
+ strcpy(supported_auth_algo[RTE_CRYPTO_AUTH_SNOW3G_UIA2], "SNOW3G_UIA2");
+
+ for (i = 0; i < RTE_CRYPTO_CIPHER_LIST_END; i++)
+ strcpy(supported_cipher_algo[i], "NOT_SUPPORTED");
+
+ strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_AES_CBC], "AES_CBC");
+ strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_AES_GCM], "AES_GCM");
+ strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_NULL], "NULL");
+ strcpy(supported_cipher_algo[RTE_CRYPTO_CIPHER_SNOW3G_UEA2], "SNOW3G_UEA2");
+}
+
+
+static int
+l2fwd_crypto_send_burst(struct lcore_queue_conf *qconf, unsigned n,
+ struct l2fwd_crypto_params *cparams)
+{
+ struct rte_crypto_op **op_buffer;
+ unsigned ret;
+
+ op_buffer = (struct rte_crypto_op **)
+ qconf->op_buf[cparams->dev_id].buffer;
+
+ ret = rte_cryptodev_enqueue_burst(cparams->dev_id,
+ cparams->qp_id, op_buffer, (uint16_t) n);
+
+ crypto_statistics[cparams->dev_id].enqueued += ret;
+ if (unlikely(ret < n)) {
+ crypto_statistics[cparams->dev_id].errors += (n - ret);
+ do {
+ rte_pktmbuf_free(op_buffer[ret]->sym->m_src);
+ rte_crypto_op_free(op_buffer[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+static int
+l2fwd_crypto_enqueue(struct rte_crypto_op *op,
+ struct l2fwd_crypto_params *cparams)
+{
+ unsigned lcore_id, len;
+ struct lcore_queue_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_queue_conf[lcore_id];
+ len = qconf->op_buf[cparams->dev_id].len;
+ qconf->op_buf[cparams->dev_id].buffer[len] = op;
+ len++;
+
+ /* enough ops to be sent */
+ if (len == MAX_PKT_BURST) {
+ l2fwd_crypto_send_burst(qconf, MAX_PKT_BURST, cparams);
+ len = 0;
+ }
+
+ qconf->op_buf[cparams->dev_id].len = len;
+ return 0;
+}
+
+static int
+l2fwd_simple_crypto_enqueue(struct rte_mbuf *m,
+ struct rte_crypto_op *op,
+ struct l2fwd_crypto_params *cparams)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ip_hdr;
+
+ unsigned ipdata_offset, pad_len, data_len;
+ char *padding;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ if (eth_hdr->ether_type != rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+ return -1;
+
+ ipdata_offset = sizeof(struct ether_hdr);
+
+ ip_hdr = (struct ipv4_hdr *)(rte_pktmbuf_mtod(m, char *) +
+ ipdata_offset);
+
+ ipdata_offset += (ip_hdr->version_ihl & IPV4_HDR_IHL_MASK)
+ * IPV4_IHL_MULTIPLIER;
+
+
+ /* Zero pad data to be crypto'd so it is block aligned */
+ data_len = rte_pktmbuf_data_len(m) - ipdata_offset;
+ pad_len = data_len % cparams->block_size ? cparams->block_size -
+ (data_len % cparams->block_size) : 0;
+
+ if (pad_len) {
+ padding = rte_pktmbuf_append(m, pad_len);
+ if (unlikely(!padding))
+ return -1;
+
+ data_len += pad_len;
+ memset(padding, 0, pad_len);
+ }
+
+ /* Set crypto operation data parameters */
+ rte_crypto_op_attach_sym_session(op, cparams->session);
+
+ if (cparams->do_hash) {
+ if (!cparams->hash_verify) {
+ /* Append space for digest to end of packet */
+ op->sym->auth.digest.data = (uint8_t *)rte_pktmbuf_append(m,
+ cparams->digest_length);
+ } else {
+ op->sym->auth.digest.data = (uint8_t *)rte_pktmbuf_append(m,
+ cparams->digest_length);
+ }
+
+ op->sym->auth.digest.phys_addr = rte_pktmbuf_mtophys_offset(m,
+ rte_pktmbuf_pkt_len(m) - cparams->digest_length);
+ op->sym->auth.digest.length = cparams->digest_length;
+
+ /* For SNOW3G algorithms, offset/length must be in bits */
+ if (cparams->auth_algo == RTE_CRYPTO_AUTH_SNOW3G_UIA2) {
+ op->sym->auth.data.offset = ipdata_offset << 3;
+ op->sym->auth.data.length = data_len << 3;
+ } else {
+ op->sym->auth.data.offset = ipdata_offset;
+ op->sym->auth.data.length = data_len;
+ }
+
+ if (cparams->aad.length) {
+ op->sym->auth.aad.data = cparams->aad.data;
+ op->sym->auth.aad.phys_addr = cparams->aad.phys_addr;
+ op->sym->auth.aad.length = cparams->aad.length;
+ }
+ }
+
+ if (cparams->do_cipher) {
+ op->sym->cipher.iv.data = cparams->iv.data;
+ op->sym->cipher.iv.phys_addr = cparams->iv.phys_addr;
+ op->sym->cipher.iv.length = cparams->iv.length;
+
+ /* For SNOW3G algorithms, offset/length must be in bits */
+ if (cparams->cipher_algo == RTE_CRYPTO_CIPHER_SNOW3G_UEA2) {
+ op->sym->cipher.data.offset = ipdata_offset << 3;
+ if (cparams->do_hash && cparams->hash_verify)
+ /* Do not cipher the hash tag */
+ op->sym->cipher.data.length = (data_len -
+ cparams->digest_length) << 3;
+ else
+ op->sym->cipher.data.length = data_len << 3;
+
+ } else {
+ op->sym->cipher.data.offset = ipdata_offset;
+ if (cparams->do_hash && cparams->hash_verify)
+ /* Do not cipher the hash tag */
+ op->sym->cipher.data.length = data_len -
+ cparams->digest_length;
+ else
+ op->sym->cipher.data.length = data_len;
+ }
+ }
+
+ op->sym->m_src = m;
+
+ return l2fwd_crypto_enqueue(op, cparams);
+}
+
+
+/* Send the burst of packets on an output interface */
+static int
+l2fwd_send_burst(struct lcore_queue_conf *qconf, unsigned n,
+ uint8_t port)
+{
+ struct rte_mbuf **pkt_buffer;
+ unsigned ret;
+
+ pkt_buffer = (struct rte_mbuf **)qconf->pkt_buf[port].buffer;
+
+ ret = rte_eth_tx_burst(port, 0, pkt_buffer, (uint16_t)n);
+ port_statistics[port].tx += ret;
+ if (unlikely(ret < n)) {
+ port_statistics[port].dropped += (n - ret);
+ do {
+ rte_pktmbuf_free(pkt_buffer[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue packets for TX and prepare them to be sent */
+static int
+l2fwd_send_packet(struct rte_mbuf *m, uint8_t port)
+{
+ unsigned lcore_id, len;
+ struct lcore_queue_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_queue_conf[lcore_id];
+ len = qconf->pkt_buf[port].len;
+ qconf->pkt_buf[port].buffer[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ l2fwd_send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->pkt_buf[port].len = len;
+ return 0;
+}
+
+static void
+l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ unsigned dst_port;
+
+ dst_port = l2fwd_dst_ports[portid];
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
+
+ l2fwd_send_packet(m, (uint8_t) dst_port);
+}
+
+/** Generate random key */
+static void
+generate_random_key(uint8_t *key, unsigned length)
+{
+ unsigned i;
+
+ for (i = 0; i < length; i++)
+ key[i] = rand() % 0xff;
+}
+
+static struct rte_cryptodev_sym_session *
+initialize_crypto_session(struct l2fwd_crypto_options *options,
+ uint8_t cdev_id)
+{
+ struct rte_crypto_sym_xform *first_xform;
+
+ if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH) {
+ first_xform = &options->cipher_xform;
+ first_xform->next = &options->auth_xform;
+ } else if (options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER) {
+ first_xform = &options->auth_xform;
+ first_xform->next = &options->cipher_xform;
+ } else if (options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) {
+ first_xform = &options->cipher_xform;
+ } else {
+ first_xform = &options->auth_xform;
+ }
+
+ /* Setup Cipher Parameters */
+ return rte_cryptodev_sym_session_create(cdev_id, first_xform);
+}
+
+static void
+l2fwd_crypto_options_print(struct l2fwd_crypto_options *options);
+
+/* main processing loop */
+static void
+l2fwd_main_loop(struct l2fwd_crypto_options *options)
+{
+ struct rte_mbuf *m, *pkts_burst[MAX_PKT_BURST];
+ struct rte_crypto_op *ops_burst[MAX_PKT_BURST];
+
+ unsigned lcore_id = rte_lcore_id();
+ uint64_t prev_tsc = 0, diff_tsc, cur_tsc, timer_tsc = 0;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * BURST_TX_DRAIN_US;
+ struct l2fwd_crypto_params *cparams;
+ struct l2fwd_crypto_params port_cparams[qconf->nb_crypto_devs];
+
+ if (qconf->nb_rx_ports == 0) {
+ RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->nb_rx_ports; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ for (i = 0; i < qconf->nb_crypto_devs; i++) {
+ port_cparams[i].do_cipher = 0;
+ port_cparams[i].do_hash = 0;
+
+ switch (options->xform_chain) {
+ case L2FWD_CRYPTO_CIPHER_HASH:
+ case L2FWD_CRYPTO_HASH_CIPHER:
+ port_cparams[i].do_cipher = 1;
+ port_cparams[i].do_hash = 1;
+ break;
+ case L2FWD_CRYPTO_HASH_ONLY:
+ port_cparams[i].do_hash = 1;
+ break;
+ case L2FWD_CRYPTO_CIPHER_ONLY:
+ port_cparams[i].do_cipher = 1;
+ break;
+ }
+
+ port_cparams[i].dev_id = qconf->cryptodev_list[i];
+ port_cparams[i].qp_id = 0;
+
+ port_cparams[i].block_size = options->block_size;
+
+ if (port_cparams[i].do_hash) {
+ port_cparams[i].digest_length =
+ options->auth_xform.auth.digest_length;
+ if (options->auth_xform.auth.add_auth_data_length) {
+ port_cparams[i].aad.data = options->aad.data;
+ port_cparams[i].aad.length =
+ options->auth_xform.auth.add_auth_data_length;
+ port_cparams[i].aad.phys_addr = options->aad.phys_addr;
+ if (!options->aad_param)
+ generate_random_key(port_cparams[i].aad.data,
+ port_cparams[i].aad.length);
+
+ }
+
+ if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_VERIFY)
+ port_cparams[i].hash_verify = 1;
+ else
+ port_cparams[i].hash_verify = 0;
+
+ port_cparams[i].auth_algo = options->auth_xform.auth.algo;
+ }
+
+ if (port_cparams[i].do_cipher) {
+ port_cparams[i].iv.data = options->iv.data;
+ port_cparams[i].iv.length = options->iv.length;
+ port_cparams[i].iv.phys_addr = options->iv.phys_addr;
+ if (!options->iv_param)
+ generate_random_key(port_cparams[i].iv.data,
+ port_cparams[i].iv.length);
+
+ port_cparams[i].cipher_algo = options->cipher_xform.cipher.algo;
+ }
+
+ port_cparams[i].session = initialize_crypto_session(options,
+ port_cparams[i].dev_id);
+
+ if (port_cparams[i].session == NULL)
+ return;
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u cryptoid=%u\n", lcore_id,
+ port_cparams[i].dev_id);
+ }
+
+ l2fwd_crypto_options_print(options);
+
+ /*
+ * Initialize previous tsc timestamp before the loop,
+ * to avoid showing the port statistics immediately,
+ * so user can see the crypto information.
+ */
+ prev_tsc = rte_rdtsc();
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (qconf->pkt_buf[portid].len == 0)
+ continue;
+ l2fwd_send_burst(&lcore_queue_conf[lcore_id],
+ qconf->pkt_buf[portid].len,
+ (uint8_t) portid);
+ qconf->pkt_buf[portid].len = 0;
+ }
+
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >=
+ (uint64_t)timer_period)) {
+
+ /* do this only on master core */
+ if (lcore_id == rte_get_master_lcore()
+ && options->refresh_period) {
+ print_stats();
+ timer_tsc = 0;
+ }
+ }
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->nb_rx_ports; i++) {
+ portid = qconf->rx_port_list[i];
+
+ cparams = &port_cparams[i];
+
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ port_statistics[portid].rx += nb_rx;
+
+ if (nb_rx) {
+ /*
+ * If we can't allocate a crypto_ops, then drop
+ * the rest of the burst and dequeue and
+ * process the packets to free offload structs
+ */
+ if (rte_crypto_op_bulk_alloc(
+ l2fwd_crypto_op_pool,
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC,
+ ops_burst, nb_rx) !=
+ nb_rx) {
+ for (j = 0; j < nb_rx; j++)
+ rte_pktmbuf_free(pkts_burst[i]);
+
+ nb_rx = 0;
+ }
+
+ /* Enqueue packets from Crypto device*/
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+
+ l2fwd_simple_crypto_enqueue(m,
+ ops_burst[j], cparams);
+ }
+ }
+
+ /* Dequeue packets from Crypto device */
+ do {
+ nb_rx = rte_cryptodev_dequeue_burst(
+ cparams->dev_id, cparams->qp_id,
+ ops_burst, MAX_PKT_BURST);
+
+ crypto_statistics[cparams->dev_id].dequeued +=
+ nb_rx;
+
+ /* Forward crypto'd packets */
+ for (j = 0; j < nb_rx; j++) {
+ m = ops_burst[j]->sym->m_src;
+
+ rte_crypto_op_free(ops_burst[j]);
+ l2fwd_simple_forward(m, portid);
+ }
+ } while (nb_rx == MAX_PKT_BURST);
+ }
+ }
+}
+
+static int
+l2fwd_launch_one_lcore(void *arg)
+{
+ l2fwd_main_loop((struct l2fwd_crypto_options *)arg);
+ return 0;
+}
+
+/* Display command line arguments usage */
+static void
+l2fwd_crypto_usage(const char *prgname)
+{
+ printf("%s [EAL options] --\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -s manage all ports from single lcore\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds"
+ " (0 to disable, 10 default, 86400 maximum)\n"
+
+ " --cdev_type HW / SW / ANY\n"
+ " --chain HASH_CIPHER / CIPHER_HASH\n"
+
+ " --cipher_algo ALGO\n"
+ " --cipher_op ENCRYPT / DECRYPT\n"
+ " --cipher_key KEY (bytes separated with \":\")\n"
+ " --cipher_key_random_size SIZE: size of cipher key when generated randomly\n"
+ " --iv IV (bytes separated with \":\")\n"
+ " --iv_random_size SIZE: size of IV when generated randomly\n"
+
+ " --auth_algo ALGO\n"
+ " --auth_op GENERATE / VERIFY\n"
+ " --auth_key KEY (bytes separated with \":\")\n"
+ " --auth_key_random_size SIZE: size of auth key when generated randomly\n"
+ " --aad AAD (bytes separated with \":\")\n"
+ " --aad_random_size SIZE: size of AAD when generated randomly\n"
+ " --digest_size SIZE: size of digest to be generated/verified\n"
+
+ " --sessionless\n",
+ prgname);
+}
+
+/** Parse crypto device type command line argument */
+static int
+parse_cryptodev_type(enum cdev_type *type, char *optarg)
+{
+ if (strcmp("HW", optarg) == 0) {
+ *type = CDEV_TYPE_HW;
+ return 0;
+ } else if (strcmp("SW", optarg) == 0) {
+ *type = CDEV_TYPE_SW;
+ return 0;
+ } else if (strcmp("ANY", optarg) == 0) {
+ *type = CDEV_TYPE_ANY;
+ return 0;
+ }
+
+ return -1;
+}
+
+/** Parse crypto chain xform command line argument */
+static int
+parse_crypto_opt_chain(struct l2fwd_crypto_options *options, char *optarg)
+{
+ if (strcmp("CIPHER_HASH", optarg) == 0) {
+ options->xform_chain = L2FWD_CRYPTO_CIPHER_HASH;
+ return 0;
+ } else if (strcmp("HASH_CIPHER", optarg) == 0) {
+ options->xform_chain = L2FWD_CRYPTO_HASH_CIPHER;
+ return 0;
+ } else if (strcmp("CIPHER_ONLY", optarg) == 0) {
+ options->xform_chain = L2FWD_CRYPTO_CIPHER_ONLY;
+ return 0;
+ } else if (strcmp("HASH_ONLY", optarg) == 0) {
+ options->xform_chain = L2FWD_CRYPTO_HASH_ONLY;
+ return 0;
+ }
+
+ return -1;
+}
+
+/** Parse crypto cipher algo option command line argument */
+static int
+parse_cipher_algo(enum rte_crypto_cipher_algorithm *algo, char *optarg)
+{
+ unsigned i;
+
+ for (i = 0; i < RTE_CRYPTO_CIPHER_LIST_END; i++) {
+ if (!strcmp(supported_cipher_algo[i], optarg)) {
+ *algo = (enum rte_crypto_cipher_algorithm)i;
+ return 0;
+ }
+ }
+
+ printf("Cipher algorithm not supported!\n");
+ return -1;
+}
+
+/** Parse crypto cipher operation command line argument */
+static int
+parse_cipher_op(enum rte_crypto_cipher_operation *op, char *optarg)
+{
+ if (strcmp("ENCRYPT", optarg) == 0) {
+ *op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+ return 0;
+ } else if (strcmp("DECRYPT", optarg) == 0) {
+ *op = RTE_CRYPTO_CIPHER_OP_DECRYPT;
+ return 0;
+ }
+
+ printf("Cipher operation not supported!\n");
+ return -1;
+}
+
+/** Parse crypto key command line argument */
+static int
+parse_key(uint8_t *data, char *input_arg)
+{
+ unsigned byte_count;
+ char *token;
+
+ for (byte_count = 0, token = strtok(input_arg, ":");
+ (byte_count < MAX_KEY_SIZE) && (token != NULL);
+ token = strtok(NULL, ":")) {
+
+ int number = (int)strtol(token, NULL, 16);
+
+ if (errno == EINVAL || errno == ERANGE || number > 0xFF)
+ return -1;
+
+ data[byte_count++] = (uint8_t)number;
+ }
+
+ return byte_count;
+}
+
+/** Parse size param*/
+static int
+parse_size(int *size, const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ n = 0;
+
+ if (n == 0) {
+ printf("invalid size\n");
+ return -1;
+ }
+
+ *size = n;
+ return 0;
+}
+
+/** Parse crypto cipher operation command line argument */
+static int
+parse_auth_algo(enum rte_crypto_auth_algorithm *algo, char *optarg)
+{
+ unsigned i;
+
+ for (i = 0; i < RTE_CRYPTO_AUTH_LIST_END; i++) {
+ if (!strcmp(supported_auth_algo[i], optarg)) {
+ *algo = (enum rte_crypto_auth_algorithm)i;
+ return 0;
+ }
+ }
+
+ printf("Authentication algorithm specified not supported!\n");
+ return -1;
+}
+
+static int
+parse_auth_op(enum rte_crypto_auth_operation *op, char *optarg)
+{
+ if (strcmp("VERIFY", optarg) == 0) {
+ *op = RTE_CRYPTO_AUTH_OP_VERIFY;
+ return 0;
+ } else if (strcmp("GENERATE", optarg) == 0) {
+ *op = RTE_CRYPTO_AUTH_OP_GENERATE;
+ return 0;
+ }
+
+ printf("Authentication operation specified not supported!\n");
+ return -1;
+}
+
+/** Parse long options */
+static int
+l2fwd_crypto_parse_args_long_options(struct l2fwd_crypto_options *options,
+ struct option *lgopts, int option_index)
+{
+ int retval;
+
+ if (strcmp(lgopts[option_index].name, "cdev_type") == 0) {
+ retval = parse_cryptodev_type(&options->type, optarg);
+ if (retval == 0)
+ snprintf(options->string_type, MAX_STR_LEN,
+ "%s", optarg);
+ return retval;
+ }
+
+ else if (strcmp(lgopts[option_index].name, "chain") == 0)
+ return parse_crypto_opt_chain(options, optarg);
+
+ /* Cipher options */
+ else if (strcmp(lgopts[option_index].name, "cipher_algo") == 0)
+ return parse_cipher_algo(&options->cipher_xform.cipher.algo,
+ optarg);
+
+ else if (strcmp(lgopts[option_index].name, "cipher_op") == 0)
+ return parse_cipher_op(&options->cipher_xform.cipher.op,
+ optarg);
+
+ else if (strcmp(lgopts[option_index].name, "cipher_key") == 0) {
+ options->ckey_param = 1;
+ options->cipher_xform.cipher.key.length =
+ parse_key(options->cipher_xform.cipher.key.data, optarg);
+ if (options->cipher_xform.cipher.key.length > 0)
+ return 0;
+ else
+ return -1;
+ }
+
+ else if (strcmp(lgopts[option_index].name, "cipher_key_random_size") == 0)
+ return parse_size(&options->ckey_random_size, optarg);
+
+ else if (strcmp(lgopts[option_index].name, "iv") == 0) {
+ options->iv_param = 1;
+ options->iv.length =
+ parse_key(options->iv.data, optarg);
+ if (options->iv.length > 0)
+ return 0;
+ else
+ return -1;
+ }
+
+ else if (strcmp(lgopts[option_index].name, "iv_random_size") == 0)
+ return parse_size(&options->iv_random_size, optarg);
+
+ /* Authentication options */
+ else if (strcmp(lgopts[option_index].name, "auth_algo") == 0) {
+ return parse_auth_algo(&options->auth_xform.auth.algo,
+ optarg);
+ }
+
+ else if (strcmp(lgopts[option_index].name, "auth_op") == 0)
+ return parse_auth_op(&options->auth_xform.auth.op,
+ optarg);
+
+ else if (strcmp(lgopts[option_index].name, "auth_key") == 0) {
+ options->akey_param = 1;
+ options->auth_xform.auth.key.length =
+ parse_key(options->auth_xform.auth.key.data, optarg);
+ if (options->auth_xform.auth.key.length > 0)
+ return 0;
+ else
+ return -1;
+ }
+
+ else if (strcmp(lgopts[option_index].name, "auth_key_random_size") == 0) {
+ return parse_size(&options->akey_random_size, optarg);
+ }
+
+ else if (strcmp(lgopts[option_index].name, "aad") == 0) {
+ options->aad_param = 1;
+ options->aad.length =
+ parse_key(options->aad.data, optarg);
+ if (options->aad.length > 0)
+ return 0;
+ else
+ return -1;
+ }
+
+ else if (strcmp(lgopts[option_index].name, "aad_random_size") == 0) {
+ return parse_size(&options->aad_random_size, optarg);
+ }
+
+ else if (strcmp(lgopts[option_index].name, "digest_size") == 0) {
+ return parse_size(&options->digest_size, optarg);
+ }
+
+ else if (strcmp(lgopts[option_index].name, "sessionless") == 0) {
+ options->sessionless = 1;
+ return 0;
+ }
+
+ return -1;
+}
+
+/** Parse port mask */
+static int
+l2fwd_crypto_parse_portmask(struct l2fwd_crypto_options *options,
+ const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(q_arg, &end, 16);
+ if ((pm == '\0') || (end == NULL) || (*end != '\0'))
+ pm = 0;
+
+ options->portmask = pm;
+ if (options->portmask == 0) {
+ printf("invalid portmask specified\n");
+ return -1;
+ }
+
+ return pm;
+}
+
+/** Parse number of queues */
+static int
+l2fwd_crypto_parse_nqueue(struct l2fwd_crypto_options *options,
+ const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ n = 0;
+ else if (n >= MAX_RX_QUEUE_PER_LCORE)
+ n = 0;
+
+ options->nb_ports_per_lcore = n;
+ if (options->nb_ports_per_lcore == 0) {
+ printf("invalid number of ports selected\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/** Parse timer period */
+static int
+l2fwd_crypto_parse_timer_period(struct l2fwd_crypto_options *options,
+ const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse number string */
+ n = (unsigned)strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ n = 0;
+
+ if (n >= MAX_TIMER_PERIOD) {
+ printf("Warning refresh period specified %lu is greater than "
+ "max value %lu! using max value",
+ n, MAX_TIMER_PERIOD);
+ n = MAX_TIMER_PERIOD;
+ }
+
+ options->refresh_period = n * 1000 * TIMER_MILLISECOND;
+
+ return 0;
+}
+
+/** Generate default options for application */
+static void
+l2fwd_crypto_default_options(struct l2fwd_crypto_options *options)
+{
+ srand(time(NULL));
+
+ options->portmask = 0xffffffff;
+ options->nb_ports_per_lcore = 1;
+ options->refresh_period = 10000;
+ options->single_lcore = 0;
+ options->sessionless = 0;
+
+ options->xform_chain = L2FWD_CRYPTO_CIPHER_HASH;
+
+ /* Cipher Data */
+ options->cipher_xform.type = RTE_CRYPTO_SYM_XFORM_CIPHER;
+ options->cipher_xform.next = NULL;
+ options->ckey_param = 0;
+ options->ckey_random_size = -1;
+ options->cipher_xform.cipher.key.length = 0;
+ options->iv_param = 0;
+ options->iv_random_size = -1;
+ options->iv.length = 0;
+
+ options->cipher_xform.cipher.algo = RTE_CRYPTO_CIPHER_AES_CBC;
+ options->cipher_xform.cipher.op = RTE_CRYPTO_CIPHER_OP_ENCRYPT;
+
+ /* Authentication Data */
+ options->auth_xform.type = RTE_CRYPTO_SYM_XFORM_AUTH;
+ options->auth_xform.next = NULL;
+ options->akey_param = 0;
+ options->akey_random_size = -1;
+ options->auth_xform.auth.key.length = 0;
+ options->aad_param = 0;
+ options->aad_random_size = -1;
+ options->aad.length = 0;
+ options->digest_size = -1;
+
+ options->auth_xform.auth.algo = RTE_CRYPTO_AUTH_SHA1_HMAC;
+ options->auth_xform.auth.op = RTE_CRYPTO_AUTH_OP_GENERATE;
+
+ options->type = CDEV_TYPE_ANY;
+}
+
+static void
+display_cipher_info(struct l2fwd_crypto_options *options)
+{
+ printf("\n---- Cipher information ---\n");
+ printf("Algorithm: %s\n",
+ supported_cipher_algo[options->cipher_xform.cipher.algo]);
+ rte_hexdump(stdout, "Cipher key:",
+ options->cipher_xform.cipher.key.data,
+ options->cipher_xform.cipher.key.length);
+ rte_hexdump(stdout, "IV:", options->iv.data, options->iv.length);
+}
+
+static void
+display_auth_info(struct l2fwd_crypto_options *options)
+{
+ printf("\n---- Authentication information ---\n");
+ printf("Algorithm: %s\n",
+ supported_auth_algo[options->auth_xform.auth.algo]);
+ rte_hexdump(stdout, "Auth key:",
+ options->auth_xform.auth.key.data,
+ options->auth_xform.auth.key.length);
+ rte_hexdump(stdout, "AAD:", options->aad.data, options->aad.length);
+}
+
+static void
+l2fwd_crypto_options_print(struct l2fwd_crypto_options *options)
+{
+ char string_cipher_op[MAX_STR_LEN];
+ char string_auth_op[MAX_STR_LEN];
+
+ if (options->cipher_xform.cipher.op == RTE_CRYPTO_CIPHER_OP_ENCRYPT)
+ strcpy(string_cipher_op, "Encrypt");
+ else
+ strcpy(string_cipher_op, "Decrypt");
+
+ if (options->auth_xform.auth.op == RTE_CRYPTO_AUTH_OP_GENERATE)
+ strcpy(string_auth_op, "Auth generate");
+ else
+ strcpy(string_auth_op, "Auth verify");
+
+ printf("Options:-\nn");
+ printf("portmask: %x\n", options->portmask);
+ printf("ports per lcore: %u\n", options->nb_ports_per_lcore);
+ printf("refresh period : %u\n", options->refresh_period);
+ printf("single lcore mode: %s\n",
+ options->single_lcore ? "enabled" : "disabled");
+ printf("stats_printing: %s\n",
+ options->refresh_period == 0 ? "disabled" : "enabled");
+
+ printf("sessionless crypto: %s\n",
+ options->sessionless ? "enabled" : "disabled");
+
+ if (options->ckey_param && (options->ckey_random_size != -1))
+ printf("Cipher key already parsed, ignoring size of random key\n");
+
+ if (options->akey_param && (options->akey_random_size != -1))
+ printf("Auth key already parsed, ignoring size of random key\n");
+
+ if (options->iv_param && (options->iv_random_size != -1))
+ printf("IV already parsed, ignoring size of random IV\n");
+
+ if (options->aad_param && (options->aad_random_size != -1))
+ printf("AAD already parsed, ignoring size of random AAD\n");
+
+ printf("\nCrypto chain: ");
+ switch (options->xform_chain) {
+ case L2FWD_CRYPTO_CIPHER_HASH:
+ printf("Input --> %s --> %s --> Output\n",
+ string_cipher_op, string_auth_op);
+ display_cipher_info(options);
+ display_auth_info(options);
+ break;
+ case L2FWD_CRYPTO_HASH_CIPHER:
+ printf("Input --> %s --> %s --> Output\n",
+ string_auth_op, string_cipher_op);
+ display_cipher_info(options);
+ display_auth_info(options);
+ break;
+ case L2FWD_CRYPTO_HASH_ONLY:
+ printf("Input --> %s --> Output\n", string_auth_op);
+ display_auth_info(options);
+ break;
+ case L2FWD_CRYPTO_CIPHER_ONLY:
+ printf("Input --> %s --> Output\n", string_cipher_op);
+ display_cipher_info(options);
+ break;
+ }
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_crypto_parse_args(struct l2fwd_crypto_options *options,
+ int argc, char **argv)
+{
+ int opt, retval, option_index;
+ char **argvopt = argv, *prgname = argv[0];
+
+ static struct option lgopts[] = {
+ { "sessionless", no_argument, 0, 0 },
+
+ { "cdev_type", required_argument, 0, 0 },
+ { "chain", required_argument, 0, 0 },
+
+ { "cipher_algo", required_argument, 0, 0 },
+ { "cipher_op", required_argument, 0, 0 },
+ { "cipher_key", required_argument, 0, 0 },
+ { "cipher_key_random_size", required_argument, 0, 0 },
+
+ { "auth_algo", required_argument, 0, 0 },
+ { "auth_op", required_argument, 0, 0 },
+ { "auth_key", required_argument, 0, 0 },
+ { "auth_key_random_size", required_argument, 0, 0 },
+
+ { "iv", required_argument, 0, 0 },
+ { "iv_random_size", required_argument, 0, 0 },
+ { "aad", required_argument, 0, 0 },
+ { "aad_random_size", required_argument, 0, 0 },
+ { "digest_size", required_argument, 0, 0 },
+
+ { "sessionless", no_argument, 0, 0 },
+
+ { NULL, 0, 0, 0 }
+ };
+
+ l2fwd_crypto_default_options(options);
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:st:", lgopts,
+ &option_index)) != EOF) {
+ switch (opt) {
+ /* long options */
+ case 0:
+ retval = l2fwd_crypto_parse_args_long_options(options,
+ lgopts, option_index);
+ if (retval < 0) {
+ l2fwd_crypto_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* portmask */
+ case 'p':
+ retval = l2fwd_crypto_parse_portmask(options, optarg);
+ if (retval < 0) {
+ l2fwd_crypto_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ retval = l2fwd_crypto_parse_nqueue(options, optarg);
+ if (retval < 0) {
+ l2fwd_crypto_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* single */
+ case 's':
+ options->single_lcore = 1;
+
+ break;
+
+ /* timer period */
+ case 'T':
+ retval = l2fwd_crypto_parse_timer_period(options,
+ optarg);
+ if (retval < 0) {
+ l2fwd_crypto_usage(prgname);
+ return -1;
+ }
+ break;
+
+ default:
+ l2fwd_crypto_usage(prgname);
+ return -1;
+ }
+ }
+
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ retval = optind-1;
+ optind = 0; /* reset getopt lib */
+
+ return retval;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/* Check if device has to be HW/SW or any */
+static int
+check_type(struct l2fwd_crypto_options *options, struct rte_cryptodev_info *dev_info)
+{
+ if (options->type == CDEV_TYPE_HW &&
+ (dev_info->feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED))
+ return 0;
+ if (options->type == CDEV_TYPE_SW &&
+ !(dev_info->feature_flags & RTE_CRYPTODEV_FF_HW_ACCELERATED))
+ return 0;
+ if (options->type == CDEV_TYPE_ANY)
+ return 0;
+
+ return -1;
+}
+
+static inline int
+check_supported_size(uint16_t length, uint16_t min, uint16_t max,
+ uint16_t increment)
+{
+ uint16_t supp_size;
+
+ for (supp_size = min; supp_size <= max; supp_size += increment) {
+ if (length == supp_size)
+ return 0;
+ }
+
+ return -1;
+}
+static int
+initialize_cryptodevs(struct l2fwd_crypto_options *options, unsigned nb_ports,
+ uint8_t *enabled_cdevs)
+{
+ unsigned i, cdev_id, cdev_count, enabled_cdev_count = 0;
+ const struct rte_cryptodev_capabilities *cap;
+ enum rte_crypto_auth_algorithm cap_auth_algo;
+ enum rte_crypto_auth_algorithm opt_auth_algo;
+ enum rte_crypto_cipher_algorithm cap_cipher_algo;
+ enum rte_crypto_cipher_algorithm opt_cipher_algo;
+ int retval;
+
+ cdev_count = rte_cryptodev_count();
+ if (cdev_count == 0) {
+ printf("No crypto devices available\n");
+ return -1;
+ }
+
+ for (cdev_id = 0; cdev_id < cdev_count && enabled_cdev_count < nb_ports;
+ cdev_id++) {
+ struct rte_cryptodev_qp_conf qp_conf;
+ struct rte_cryptodev_info dev_info;
+
+ struct rte_cryptodev_config conf = {
+ .nb_queue_pairs = 1,
+ .socket_id = SOCKET_ID_ANY,
+ .session_mp = {
+ .nb_objs = 2048,
+ .cache_size = 64
+ }
+ };
+
+ rte_cryptodev_info_get(cdev_id, &dev_info);
+
+ /* Set cipher parameters */
+ if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH ||
+ options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER ||
+ options->xform_chain == L2FWD_CRYPTO_CIPHER_ONLY) {
+ /* Check if device supports cipher algo */
+ i = 0;
+ opt_cipher_algo = options->cipher_xform.cipher.algo;
+ cap = &dev_info.capabilities[i];
+ while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+ cap_cipher_algo = cap->sym.cipher.algo;
+ if (cap->sym.xform_type ==
+ RTE_CRYPTO_SYM_XFORM_CIPHER) {
+ if (cap_cipher_algo == opt_cipher_algo) {
+ if (check_type(options, &dev_info) == 0)
+ break;
+ }
+ }
+ cap = &dev_info.capabilities[++i];
+ }
+
+ if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+ printf("Algorithm %s not supported by cryptodev %u"
+ " or device not of preferred type (%s)\n",
+ supported_cipher_algo[opt_cipher_algo],
+ cdev_id,
+ options->string_type);
+ continue;
+ }
+
+ options->block_size = cap->sym.cipher.block_size;
+ /*
+ * Check if length of provided IV is supported
+ * by the algorithm chosen.
+ */
+ if (options->iv_param) {
+ if (check_supported_size(options->iv.length,
+ cap->sym.cipher.iv_size.min,
+ cap->sym.cipher.iv_size.max,
+ cap->sym.cipher.iv_size.increment)
+ != 0) {
+ printf("Unsupported IV length\n");
+ return -1;
+ }
+ /*
+ * Check if length of IV to be randomly generated
+ * is supported by the algorithm chosen.
+ */
+ } else if (options->iv_random_size != -1) {
+ if (check_supported_size(options->iv_random_size,
+ cap->sym.cipher.iv_size.min,
+ cap->sym.cipher.iv_size.max,
+ cap->sym.cipher.iv_size.increment)
+ != 0) {
+ printf("Unsupported IV length\n");
+ return -1;
+ }
+ options->iv.length = options->iv_random_size;
+ /* No size provided, use minimum size. */
+ } else
+ options->iv.length = cap->sym.cipher.iv_size.min;
+
+ /*
+ * Check if length of provided cipher key is supported
+ * by the algorithm chosen.
+ */
+ if (options->ckey_param) {
+ if (check_supported_size(
+ options->cipher_xform.cipher.key.length,
+ cap->sym.cipher.key_size.min,
+ cap->sym.cipher.key_size.max,
+ cap->sym.cipher.key_size.increment)
+ != 0) {
+ printf("Unsupported cipher key length\n");
+ return -1;
+ }
+ /*
+ * Check if length of the cipher key to be randomly generated
+ * is supported by the algorithm chosen.
+ */
+ } else if (options->ckey_random_size != -1) {
+ if (check_supported_size(options->ckey_random_size,
+ cap->sym.cipher.key_size.min,
+ cap->sym.cipher.key_size.max,
+ cap->sym.cipher.key_size.increment)
+ != 0) {
+ printf("Unsupported cipher key length\n");
+ return -1;
+ }
+ options->cipher_xform.cipher.key.length =
+ options->ckey_random_size;
+ /* No size provided, use minimum size. */
+ } else
+ options->cipher_xform.cipher.key.length =
+ cap->sym.cipher.key_size.min;
+
+ if (!options->ckey_param)
+ generate_random_key(
+ options->cipher_xform.cipher.key.data,
+ options->cipher_xform.cipher.key.length);
+
+ }
+
+ /* Set auth parameters */
+ if (options->xform_chain == L2FWD_CRYPTO_CIPHER_HASH ||
+ options->xform_chain == L2FWD_CRYPTO_HASH_CIPHER ||
+ options->xform_chain == L2FWD_CRYPTO_HASH_ONLY) {
+ /* Check if device supports auth algo */
+ i = 0;
+ opt_auth_algo = options->auth_xform.auth.algo;
+ cap = &dev_info.capabilities[i];
+ while (cap->op != RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+ cap_auth_algo = cap->sym.auth.algo;
+ if ((cap->sym.xform_type == RTE_CRYPTO_SYM_XFORM_AUTH) &&
+ (cap_auth_algo == opt_auth_algo) &&
+ (check_type(options, &dev_info) == 0)) {
+ break;
+ }
+ cap = &dev_info.capabilities[++i];
+ }
+
+ if (cap->op == RTE_CRYPTO_OP_TYPE_UNDEFINED) {
+ printf("Algorithm %s not supported by cryptodev %u"
+ " or device not of preferred type (%s)\n",
+ supported_auth_algo[opt_auth_algo],
+ cdev_id,
+ options->string_type);
+ continue;
+ }
+
+ options->block_size = cap->sym.auth.block_size;
+ /*
+ * Check if length of provided AAD is supported
+ * by the algorithm chosen.
+ */
+ if (options->aad_param) {
+ if (check_supported_size(options->aad.length,
+ cap->sym.auth.aad_size.min,
+ cap->sym.auth.aad_size.max,
+ cap->sym.auth.aad_size.increment)
+ != 0) {
+ printf("Unsupported AAD length\n");
+ return -1;
+ }
+ /*
+ * Check if length of AAD to be randomly generated
+ * is supported by the algorithm chosen.
+ */
+ } else if (options->aad_random_size != -1) {
+ if (check_supported_size(options->aad_random_size,
+ cap->sym.auth.aad_size.min,
+ cap->sym.auth.aad_size.max,
+ cap->sym.auth.aad_size.increment)
+ != 0) {
+ printf("Unsupported AAD length\n");
+ return -1;
+ }
+ options->aad.length = options->aad_random_size;
+ /* No size provided, use minimum size. */
+ } else
+ options->aad.length = cap->sym.auth.aad_size.min;
+
+ options->auth_xform.auth.add_auth_data_length =
+ options->aad.length;
+
+ /*
+ * Check if length of provided auth key is supported
+ * by the algorithm chosen.
+ */
+ if (options->akey_param) {
+ if (check_supported_size(
+ options->auth_xform.auth.key.length,
+ cap->sym.auth.key_size.min,
+ cap->sym.auth.key_size.max,
+ cap->sym.auth.key_size.increment)
+ != 0) {
+ printf("Unsupported auth key length\n");
+ return -1;
+ }
+ /*
+ * Check if length of the auth key to be randomly generated
+ * is supported by the algorithm chosen.
+ */
+ } else if (options->akey_random_size != -1) {
+ if (check_supported_size(options->akey_random_size,
+ cap->sym.auth.key_size.min,
+ cap->sym.auth.key_size.max,
+ cap->sym.auth.key_size.increment)
+ != 0) {
+ printf("Unsupported auth key length\n");
+ return -1;
+ }
+ options->auth_xform.auth.key.length =
+ options->akey_random_size;
+ /* No size provided, use minimum size. */
+ } else
+ options->auth_xform.auth.key.length =
+ cap->sym.auth.key_size.min;
+
+ if (!options->akey_param)
+ generate_random_key(
+ options->auth_xform.auth.key.data,
+ options->auth_xform.auth.key.length);
+
+ /* Check if digest size is supported by the algorithm. */
+ if (options->digest_size != -1) {
+ if (check_supported_size(options->digest_size,
+ cap->sym.auth.digest_size.min,
+ cap->sym.auth.digest_size.max,
+ cap->sym.auth.digest_size.increment)
+ != 0) {
+ printf("Unsupported digest length\n");
+ return -1;
+ }
+ options->auth_xform.auth.digest_length =
+ options->digest_size;
+ /* No size provided, use minimum size. */
+ } else
+ options->auth_xform.auth.digest_length =
+ cap->sym.auth.digest_size.min;
+ }
+
+ retval = rte_cryptodev_configure(cdev_id, &conf);
+ if (retval < 0) {
+ printf("Failed to configure cryptodev %u", cdev_id);
+ return -1;
+ }
+
+ qp_conf.nb_descriptors = 2048;
+
+ retval = rte_cryptodev_queue_pair_setup(cdev_id, 0, &qp_conf,
+ SOCKET_ID_ANY);
+ if (retval < 0) {
+ printf("Failed to setup queue pair %u on cryptodev %u",
+ 0, cdev_id);
+ return -1;
+ }
+
+ l2fwd_enabled_crypto_mask |= (1 << cdev_id);
+
+ enabled_cdevs[cdev_id] = 1;
+ enabled_cdev_count++;
+ }
+
+ return enabled_cdev_count;
+}
+
+static int
+initialize_ports(struct l2fwd_crypto_options *options)
+{
+ uint8_t last_portid, portid;
+ unsigned enabled_portcount = 0;
+ unsigned nb_ports = rte_eth_dev_count();
+
+ if (nb_ports == 0) {
+ printf("No Ethernet ports - bye\n");
+ return -1;
+ }
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /* Reset l2fwd_dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ l2fwd_dst_ports[portid] = 0;
+
+ for (last_portid = 0, portid = 0; portid < nb_ports; portid++) {
+ int retval;
+
+ /* Skip ports that are not enabled */
+ if ((options->portmask & (1 << portid)) == 0)
+ continue;
+
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ retval = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (retval < 0) {
+ printf("Cannot configure device: err=%d, port=%u\n",
+ retval, (unsigned) portid);
+ return -1;
+ }
+
+ /* init one RX queue */
+ fflush(stdout);
+ retval = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL, l2fwd_pktmbuf_pool);
+ if (retval < 0) {
+ printf("rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ retval, (unsigned) portid);
+ return -1;
+ }
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ retval = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (retval < 0) {
+ printf("rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ retval, (unsigned) portid);
+
+ return -1;
+ }
+
+ /* Start device */
+ retval = rte_eth_dev_start(portid);
+ if (retval < 0) {
+ printf("rte_eth_dev_start:err=%d, port=%u\n",
+ retval, (unsigned) portid);
+ return -1;
+ }
+
+ rte_eth_promiscuous_enable(portid);
+
+ rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+
+ /* Setup port forwarding table */
+ if (enabled_portcount % 2) {
+ l2fwd_dst_ports[portid] = last_portid;
+ l2fwd_dst_ports[last_portid] = portid;
+ } else {
+ last_portid = portid;
+ }
+
+ l2fwd_enabled_port_mask |= (1 << portid);
+ enabled_portcount++;
+ }
+
+ if (enabled_portcount == 1) {
+ l2fwd_dst_ports[last_portid] = last_portid;
+ } else if (enabled_portcount % 2) {
+ printf("odd number of ports in portmask- bye\n");
+ return -1;
+ }
+
+ check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
+
+ return enabled_portcount;
+}
+
+static void
+reserve_key_memory(struct l2fwd_crypto_options *options)
+{
+ options->cipher_xform.cipher.key.data = rte_malloc("crypto key",
+ MAX_KEY_SIZE, 0);
+ if (options->cipher_xform.cipher.key.data == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to allocate memory for cipher key");
+
+
+ options->auth_xform.auth.key.data = rte_malloc("auth key",
+ MAX_KEY_SIZE, 0);
+ if (options->auth_xform.auth.key.data == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to allocate memory for auth key");
+
+ options->iv.data = rte_malloc("iv", MAX_KEY_SIZE, 0);
+ if (options->iv.data == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to allocate memory for IV");
+ options->iv.phys_addr = rte_malloc_virt2phy(options->iv.data);
+
+ options->aad.data = rte_malloc("aad", MAX_KEY_SIZE, 0);
+ if (options->aad.data == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to allocate memory for AAD");
+ options->aad.phys_addr = rte_malloc_virt2phy(options->aad.data);
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct l2fwd_crypto_options options;
+
+ uint8_t nb_ports, nb_cryptodevs, portid, cdev_id;
+ unsigned lcore_id, rx_lcore_id;
+ int ret, enabled_cdevcount, enabled_portcount;
+ uint8_t enabled_cdevs[RTE_CRYPTO_MAX_DEVS] = {0};
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* reserve memory for Cipher/Auth key and IV */
+ reserve_key_memory(&options);
+
+ /* fill out the supported algorithm tables */
+ fill_supported_algorithm_tables();
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_crypto_parse_args(&options, argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2FWD-CRYPTO arguments\n");
+
+ /* create the mbuf pool */
+ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 512,
+ sizeof(struct rte_crypto_op),
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (l2fwd_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* create crypto op pool */
+ l2fwd_crypto_op_pool = rte_crypto_op_pool_create("crypto_op_pool",
+ RTE_CRYPTO_OP_TYPE_SYMMETRIC, NB_MBUF, 128, 0,
+ rte_socket_id());
+ if (l2fwd_crypto_op_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create crypto op pool\n");
+
+ /* Enable Ethernet ports */
+ enabled_portcount = initialize_ports(&options);
+ if (enabled_portcount < 1)
+ rte_exit(EXIT_FAILURE, "Failed to initial Ethernet ports\n");
+
+ nb_ports = rte_eth_dev_count();
+ /* Initialize the port/queue configuration of each logical core */
+ for (rx_lcore_id = 0, qconf = NULL, portid = 0;
+ portid < nb_ports; portid++) {
+
+ /* skip ports that are not enabled */
+ if ((options.portmask & (1 << portid)) == 0)
+ continue;
+
+ if (options.single_lcore && qconf == NULL) {
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,
+ "Not enough cores\n");
+ }
+ } else if (!options.single_lcore) {
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].nb_rx_ports ==
+ options.nb_ports_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,
+ "Not enough cores\n");
+ }
+ }
+
+ /* Assigned a new logical core in the loop above. */
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->nb_rx_ports] = portid;
+ qconf->nb_rx_ports++;
+
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned)portid);
+ }
+
+ /* Enable Crypto devices */
+ enabled_cdevcount = initialize_cryptodevs(&options, enabled_portcount,
+ enabled_cdevs);
+ if (enabled_cdevcount < 0)
+ rte_exit(EXIT_FAILURE, "Failed to initialize crypto devices\n");
+
+ if (enabled_cdevcount < enabled_portcount)
+ rte_exit(EXIT_FAILURE, "Number of capable crypto devices (%d) "
+ "has to be more or equal to number of ports (%d)\n",
+ enabled_cdevcount, enabled_portcount);
+
+ nb_cryptodevs = rte_cryptodev_count();
+
+ /* Initialize the port/cryptodev configuration of each logical core */
+ for (rx_lcore_id = 0, qconf = NULL, cdev_id = 0;
+ cdev_id < nb_cryptodevs && enabled_cdevcount;
+ cdev_id++) {
+ /* Crypto op not supported by crypto device */
+ if (!enabled_cdevs[cdev_id])
+ continue;
+
+ if (options.single_lcore && qconf == NULL) {
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,
+ "Not enough cores\n");
+ }
+ } else if (!options.single_lcore) {
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].nb_crypto_devs ==
+ options.nb_ports_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,
+ "Not enough cores\n");
+ }
+ }
+
+ /* Assigned a new logical core in the loop above. */
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->cryptodev_list[qconf->nb_crypto_devs] = cdev_id;
+ qconf->nb_crypto_devs++;
+
+ enabled_cdevcount--;
+
+ printf("Lcore %u: cryptodev %u\n", rx_lcore_id,
+ (unsigned)cdev_id);
+ }
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, (void *)&options,
+ CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l2fwd-ivshmem/Makefile b/examples/l2fwd-ivshmem/Makefile
new file mode 100644
index 00000000..5f1d1728
--- /dev/null
+++ b/examples/l2fwd-ivshmem/Makefile
@@ -0,0 +1,43 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += host guest
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/l2fwd-ivshmem/guest/Makefile b/examples/l2fwd-ivshmem/guest/Makefile
new file mode 100644
index 00000000..3ca73b43
--- /dev/null
+++ b/examples/l2fwd-ivshmem/guest/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = guest
+
+# all source are stored in SRCS-y
+SRCS-y := guest.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-ivshmem/guest/guest.c b/examples/l2fwd-ivshmem/guest/guest.c
new file mode 100644
index 00000000..7c49521b
--- /dev/null
+++ b/examples/l2fwd-ivshmem/guest/guest.c
@@ -0,0 +1,452 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/queue.h>
+#include <sys/file.h>
+#include <unistd.h>
+#include <limits.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include <rte_common.h>
+#include <rte_eal_memconfig.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ivshmem.h>
+
+#include "../include/common.h"
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+ struct mbuf_table rx_mbufs[RTE_MAX_ETHPORTS];
+ struct vm_port_param * port_param[MAX_RX_QUEUE_PER_LCORE];
+} __rte_cache_aligned;
+static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < ctrl->nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ ctrl->vm_ports[portid].stats.tx,
+ ctrl->vm_ports[portid].stats.rx,
+ ctrl->vm_ports[portid].stats.dropped);
+
+ total_packets_dropped += ctrl->vm_ports[portid].stats.dropped;
+ total_packets_tx += ctrl->vm_ports[portid].stats.tx;
+ total_packets_rx += ctrl->vm_ports[portid].stats.rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+ printf("\n====================================================\n");
+}
+
+/* display usage */
+static void
+l2fwd_ivshmem_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- [-q NQ -T PERIOD]\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static unsigned int
+l2fwd_ivshmem_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_ivshmem_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_ivshmem_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "q:p:T:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+
+ /* nqueue */
+ case 'q':
+ l2fwd_ivshmem_rx_queue_per_lcore = l2fwd_ivshmem_parse_nqueue(optarg);
+ if (l2fwd_ivshmem_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_ivshmem_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND;
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/*
+ * this loop is getting packets from RX rings of each port, and puts them
+ * into TX rings of destination ports.
+ */
+static void
+fwd_loop(void)
+{
+
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf **m_table;
+ struct rte_mbuf *m;
+ struct rte_ring *rx, *tx;
+ unsigned lcore_id, len;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ struct ether_hdr *eth;
+ void *tmp;
+
+ prev_tsc = 0;
+ timer_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD_IVSHMEM, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD_IVSHMEM, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD_IVSHMEM, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ while (ctrl->state == STATE_FWD) {
+ cur_tsc = rte_rdtsc();
+
+ diff_tsc = cur_tsc - prev_tsc;
+
+ /*
+ * Read packet from RX queues and send it to TX queues
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+
+ len = qconf->rx_mbufs[portid].len;
+
+ rx = ctrl->vm_ports[portid].rx_ring;
+ tx = ctrl->vm_ports[portid].dst->tx_ring;
+
+ m_table = qconf->rx_mbufs[portid].m_table;
+
+ /* if we have something in the queue, try and transmit it down */
+ if (len != 0) {
+
+ /* if we succeed in sending the packets down, mark queue as free */
+ if (rte_ring_enqueue_bulk(tx, (void**) m_table, len) == 0) {
+ ctrl->vm_ports[portid].stats.tx += len;
+ qconf->rx_mbufs[portid].len = 0;
+ len = 0;
+ }
+ }
+
+ nb_rx = rte_ring_count(rx);
+
+ nb_rx = RTE_MIN(nb_rx, (unsigned) MAX_PKT_BURST);
+
+ if (nb_rx == 0)
+ continue;
+
+ /* if we can get packets into the m_table */
+ if (nb_rx < (RTE_DIM(qconf->rx_mbufs[portid].m_table) - len)) {
+
+ /* this situation cannot exist, so if we fail to dequeue, that
+ * means something went horribly wrong, hence the failure. */
+ if (rte_ring_dequeue_bulk(rx, (void**) pkts_burst, nb_rx) < 0) {
+ ctrl->state = STATE_FAIL;
+ return;
+ }
+
+ ctrl->vm_ports[portid].stats.rx += nb_rx;
+
+ /* put packets into the queue */
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+
+ m_table[len + j] = m;
+
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)portid << 40);
+
+ /* src addr */
+ ether_addr_copy(&ctrl->vm_ports[portid].dst->ethaddr,
+ &eth->s_addr);
+ }
+ qconf->rx_mbufs[portid].len += nb_rx;
+
+ }
+
+ }
+
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
+
+ /* do this only on master core */
+ if (lcore_id == rte_get_master_lcore()) {
+ print_stats();
+ /* reset the timer */
+ timer_tsc = 0;
+ }
+ }
+ }
+
+ prev_tsc = cur_tsc;
+ }
+}
+
+static int
+l2fwd_ivshmem_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ fwd_loop();
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ const struct rte_memzone * mz;
+ int ret;
+ uint8_t portid;
+ unsigned rx_lcore_id, lcore_id;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_ivshmem_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid l2fwd-ivshmem arguments\n");
+
+ /* find control structure */
+ mz = rte_memzone_lookup(CTRL_MZ_NAME);
+ if (mz == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot find control memzone\n");
+
+ ctrl = (struct ivshmem_ctrl*) mz->addr;
+
+ /* lock the ctrl so that we don't have conflicts with anything else */
+ rte_spinlock_lock(&ctrl->lock);
+
+ if (ctrl->state == STATE_FWD)
+ rte_exit(EXIT_FAILURE, "Forwarding already started!\n");
+
+ rx_lcore_id = 0;
+ qconf = NULL;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < ctrl->nb_ports; portid++) {
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_ivshmem_rx_queue_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->port_param[qconf->n_rx_port] = &ctrl->vm_ports[portid];
+ qconf->n_rx_port++;
+
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
+ }
+
+ sigsetup();
+
+ /* indicate that we are ready to forward */
+ ctrl->state = STATE_FWD;
+
+ /* unlock */
+ rte_spinlock_unlock(&ctrl->lock);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l2fwd_ivshmem_launch_one_lcore, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l2fwd-ivshmem/host/Makefile b/examples/l2fwd-ivshmem/host/Makefile
new file mode 100644
index 00000000..f91419e9
--- /dev/null
+++ b/examples/l2fwd-ivshmem/host/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = host
+
+# all source are stored in SRCS-y
+SRCS-y := host.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-ivshmem/host/host.c b/examples/l2fwd-ivshmem/host/host.c
new file mode 100644
index 00000000..4bd7c41d
--- /dev/null
+++ b/examples/l2fwd-ivshmem/host/host.c
@@ -0,0 +1,897 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <getopt.h>
+#include <signal.h>
+
+#include <rte_eal.h>
+#include <rte_cycles.h>
+#include <rte_eal_memconfig.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+#include <rte_ivshmem.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#include "../include/common.h"
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/* mask of enabled ports */
+static uint32_t l2fwd_ivshmem_enabled_port_mask = 0;
+
+static struct ether_addr l2fwd_ivshmem_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+#define NB_MBUF 8192
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+ struct vm_port_param * port_param[MAX_RX_QUEUE_PER_LCORE];
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+ struct mbuf_table rx_mbufs[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+static struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+#define METADATA_NAME "l2fwd_ivshmem"
+#define CMDLINE_OPT_FWD_CONF "fwd-conf"
+
+#define QEMU_CMD_FMT "/tmp/ivshmem_qemu_cmdline_%s"
+
+struct port_statistics port_statistics[RTE_MAX_ETHPORTS];
+
+struct rte_mempool * l2fwd_ivshmem_pktmbuf_pool = NULL;
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ uint64_t total_vm_packets_dropped, total_vm_packets_tx, total_vm_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+ total_vm_packets_tx = 0;
+ total_vm_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+
+ printf("\nVM statistics ======================================");
+ for (portid = 0; portid < ctrl->nb_ports; portid++) {
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64,
+ portid,
+ ctrl->vm_ports[portid].stats.tx,
+ ctrl->vm_ports[portid].stats.rx);
+
+ total_vm_packets_dropped += ctrl->vm_ports[portid].stats.dropped;
+ total_vm_packets_tx += ctrl->vm_ports[portid].stats.tx;
+ total_vm_packets_rx += ctrl->vm_ports[portid].stats.rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64
+ "\nTotal VM packets sent: %15"PRIu64
+ "\nTotal VM packets received: %11"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped,
+ total_vm_packets_tx,
+ total_vm_packets_rx);
+ printf("\n====================================================\n");
+}
+
+static int
+print_to_file(const char *cmdline, const char *config_name)
+{
+ FILE *file;
+ char path[PATH_MAX];
+
+ snprintf(path, sizeof(path), QEMU_CMD_FMT, config_name);
+ file = fopen(path, "w");
+ if (file == NULL) {
+ RTE_LOG(ERR, L2FWD_IVSHMEM, "Could not open '%s' \n", path);
+ return -1;
+ }
+
+ RTE_LOG(DEBUG, L2FWD_IVSHMEM, "QEMU command line for config '%s': %s \n",
+ config_name, cmdline);
+
+ fprintf(file, "%s\n", cmdline);
+ fclose(file);
+ return 0;
+}
+
+static int
+generate_ivshmem_cmdline(const char *config_name)
+{
+ char cmdline[PATH_MAX];
+ if (rte_ivshmem_metadata_cmdline_generate(cmdline, sizeof(cmdline),
+ config_name) < 0)
+ return -1;
+
+ if (print_to_file(cmdline, config_name) < 0)
+ return -1;
+
+ rte_ivshmem_metadata_dump(stdout, config_name);
+ return 0;
+}
+
+/* display usage */
+static void
+l2fwd_ivshmem_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ -T PERIOD]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds "
+ "(0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static unsigned int
+l2fwd_ivshmem_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_ivshmem_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+l2fwd_ivshmem_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_ivshmem_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {CMDLINE_OPT_FWD_CONF, 1, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "q:p:T:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ l2fwd_ivshmem_enabled_port_mask = l2fwd_ivshmem_parse_portmask(optarg);
+ if (l2fwd_ivshmem_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ l2fwd_ivshmem_rx_queue_per_lcore = l2fwd_ivshmem_parse_nqueue(optarg);
+ if (l2fwd_ivshmem_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_ivshmem_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND;
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_ivshmem_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/* Send the burst of packets on an output interface */
+static int
+l2fwd_ivshmem_send_burst(struct lcore_queue_conf *qconf, unsigned n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ unsigned ret;
+ unsigned queueid =0;
+
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ ret = rte_eth_tx_burst(port, (uint16_t) queueid, m_table, (uint16_t) n);
+ port_statistics[port].tx += ret;
+ if (unlikely(ret < n)) {
+ port_statistics[port].dropped += (n - ret);
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue packets for TX and prepare them to be sent on the network */
+static int
+l2fwd_ivshmem_send_packet(struct rte_mbuf *m, uint8_t port)
+{
+ unsigned lcore_id, len;
+ struct lcore_queue_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_queue_conf[lcore_id];
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ l2fwd_ivshmem_send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+static int
+l2fwd_ivshmem_receive_burst(struct lcore_queue_conf *qconf, unsigned portid,
+ unsigned vm_port)
+{
+ struct rte_mbuf ** m;
+ struct rte_ring * rx;
+ unsigned len, pkt_idx;
+
+ m = qconf->rx_mbufs[portid].m_table;
+ len = qconf->rx_mbufs[portid].len;
+ rx = qconf->port_param[vm_port]->rx_ring;
+
+ /* if enqueueing failed, ring is probably full, so drop the packets */
+ if (rte_ring_enqueue_bulk(rx, (void**) m, len) < 0) {
+ port_statistics[portid].dropped += len;
+
+ pkt_idx = 0;
+ do {
+ rte_pktmbuf_free(m[pkt_idx]);
+ } while (++pkt_idx < len);
+ }
+ else
+ /* increment rx stats by however many packets we managed to receive */
+ port_statistics[portid].rx += len;
+
+ return 0;
+}
+
+/* Enqueue packets for RX and prepare them to be sent to VM */
+static int
+l2fwd_ivshmem_receive_packets(struct rte_mbuf ** m, unsigned n, unsigned portid,
+ unsigned vm_port)
+{
+ unsigned lcore_id, len, pkt_idx;
+ struct lcore_queue_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_queue_conf[lcore_id];
+
+ len = qconf->rx_mbufs[portid].len;
+ pkt_idx = 0;
+
+ /* enqueue packets */
+ while (pkt_idx < n && len < MAX_PKT_BURST * 2) {
+ qconf->rx_mbufs[portid].m_table[len++] = m[pkt_idx++];
+ }
+
+ /* increment queue len by however many packets we managed to receive */
+ qconf->rx_mbufs[portid].len += pkt_idx;
+
+ /* drop the unreceived packets */
+ if (unlikely(pkt_idx < n)) {
+ port_statistics[portid].dropped += n - pkt_idx;
+ do {
+ rte_pktmbuf_free(m[pkt_idx]);
+ } while (++pkt_idx < n);
+ }
+
+ /* drain the queue halfway through the maximum capacity */
+ if (unlikely(qconf->rx_mbufs[portid].len >= MAX_PKT_BURST))
+ l2fwd_ivshmem_receive_burst(qconf, portid, vm_port);
+
+ return 0;
+}
+
+/* loop for host forwarding mode.
+ * the data flow is as follows:
+ * 1) get packets from TX queue and send it out from a given port
+ * 2) RX packets from given port and enqueue them on RX ring
+ * 3) dequeue packets from TX ring and put them on TX queue for a given port
+ */
+static void
+fwd_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST * 2];
+ struct rte_mbuf *m;
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ struct rte_ring *tx;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+ timer_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD_IVSHMEM, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD_IVSHMEM, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD_IVSHMEM, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ while (ctrl->state == STATE_FWD) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * Burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ /*
+ * TX
+ */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (qconf->tx_mbufs[portid].len == 0)
+ continue;
+ l2fwd_ivshmem_send_burst(qconf,
+ qconf->tx_mbufs[portid].len,
+ (uint8_t) portid);
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ /*
+ * RX
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ portid = qconf->rx_port_list[i];
+ if (qconf->rx_mbufs[portid].len == 0)
+ continue;
+ l2fwd_ivshmem_receive_burst(qconf, portid, i);
+ qconf->rx_mbufs[portid].len = 0;
+ }
+
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
+
+ /* do this only on master core */
+ if (lcore_id == rte_get_master_lcore()) {
+ print_stats();
+ /* reset the timer */
+ timer_tsc = 0;
+ }
+ }
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * packet RX and forwarding
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ /* RX packets from port and put them on RX ring */
+ portid = qconf->rx_port_list[i];
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ if (nb_rx != 0)
+ l2fwd_ivshmem_receive_packets(pkts_burst, nb_rx, portid, i);
+
+ /* dequeue packets from TX ring and send them to TX queue */
+ tx = qconf->port_param[i]->tx_ring;
+
+ nb_rx = rte_ring_count(tx);
+
+ nb_rx = RTE_MIN(nb_rx, (unsigned) MAX_PKT_BURST);
+
+ if (nb_rx == 0)
+ continue;
+
+ /* should not happen */
+ if (unlikely(rte_ring_dequeue_bulk(tx, (void**) pkts_burst, nb_rx) < 0)) {
+ ctrl->state = STATE_FAIL;
+ return;
+ }
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ l2fwd_ivshmem_send_packet(m, portid);
+ }
+ }
+ }
+}
+
+static int
+l2fwd_ivshmem_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ fwd_loop();
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ char name[RTE_RING_NAMESIZE];
+ struct rte_ring *r;
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ uint8_t portid, port_nr;
+ uint8_t nb_ports, nb_ports_available;
+ uint8_t nb_ports_in_mask;
+ int ret;
+ unsigned lcore_id, rx_lcore_id;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_ivshmem_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid l2fwd-ivshmem arguments\n");
+
+ /* create a shared mbuf pool */
+ l2fwd_ivshmem_pktmbuf_pool =
+ rte_pktmbuf_pool_create(MBUF_MP_NAME, NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (l2fwd_ivshmem_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * reserve memzone to communicate with VMs - we cannot use rte_malloc here
+ * because while it is technically possible, it is a very bad idea to share
+ * the heap between two primary processes.
+ */
+ ctrl_mz = rte_memzone_reserve(CTRL_MZ_NAME, sizeof(struct ivshmem_ctrl),
+ SOCKET_ID_ANY, 0);
+ if (ctrl_mz == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot reserve control memzone\n");
+ ctrl = (struct ivshmem_ctrl*) ctrl_mz->addr;
+
+ memset(ctrl, 0, sizeof(struct ivshmem_ctrl));
+
+ /*
+ * Each port is assigned an output port.
+ */
+ nb_ports_in_mask = 0;
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ if (portid % 2) {
+ ctrl->vm_ports[nb_ports_in_mask].dst = &ctrl->vm_ports[nb_ports_in_mask-1];
+ ctrl->vm_ports[nb_ports_in_mask-1].dst = &ctrl->vm_ports[nb_ports_in_mask];
+ }
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ ctrl->vm_ports[nb_ports_in_mask-1].dst =
+ &ctrl->vm_ports[nb_ports_in_mask-1];
+ }
+
+ rx_lcore_id = 0;
+ qconf = NULL;
+
+ printf("Initializing ports configuration...\n");
+
+ nb_ports_available = nb_ports;
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+
+ /* skip ports that are not enabled */
+ if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n", (unsigned) portid);
+ nb_ports_available--;
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_macaddr_get(portid,&l2fwd_ivshmem_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ l2fwd_ivshmem_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ printf("done: \n");
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ivshmem_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+ port_nr = 0;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((l2fwd_ivshmem_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_ivshmem_rx_queue_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+
+ rte_eth_macaddr_get(portid, &ctrl->vm_ports[port_nr].ethaddr);
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->port_param[qconf->n_rx_port] = &ctrl->vm_ports[port_nr];
+ qconf->n_rx_port++;
+ port_nr++;
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
+ }
+
+ check_all_ports_link_status(nb_ports_available, l2fwd_ivshmem_enabled_port_mask);
+
+ /* create rings for each VM port (several ports can be on the same VM).
+ * note that we store the pointers in ctrl - that way, they are the same
+ * and valid across all VMs because ctrl is also in DPDK memory */
+ for (portid = 0; portid < nb_ports_available; portid++) {
+
+ /* RX ring. SP/SC because it's only used by host and a single VM */
+ snprintf(name, sizeof(name), "%s%i", RX_RING_PREFIX, portid);
+ r = rte_ring_create(name, NB_MBUF,
+ SOCKET_ID_ANY, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (r == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create ring %s\n", name);
+
+ ctrl->vm_ports[portid].rx_ring = r;
+
+ /* TX ring. SP/SC because it's only used by host and a single VM */
+ snprintf(name, sizeof(name), "%s%i", TX_RING_PREFIX, portid);
+ r = rte_ring_create(name, NB_MBUF,
+ SOCKET_ID_ANY, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (r == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create ring %s\n", name);
+
+ ctrl->vm_ports[portid].tx_ring = r;
+ }
+
+ /* create metadata, output cmdline */
+ if (rte_ivshmem_metadata_create(METADATA_NAME) < 0)
+ rte_exit(EXIT_FAILURE, "Cannot create IVSHMEM metadata\n");
+
+ if (rte_ivshmem_metadata_add_memzone(ctrl_mz, METADATA_NAME))
+ rte_exit(EXIT_FAILURE, "Cannot add memzone to IVSHMEM metadata\n");
+
+ if (rte_ivshmem_metadata_add_mempool(l2fwd_ivshmem_pktmbuf_pool, METADATA_NAME))
+ rte_exit(EXIT_FAILURE, "Cannot add mbuf mempool to IVSHMEM metadata\n");
+
+ for (portid = 0; portid < nb_ports_available; portid++) {
+ if (rte_ivshmem_metadata_add_ring(ctrl->vm_ports[portid].rx_ring,
+ METADATA_NAME) < 0)
+ rte_exit(EXIT_FAILURE, "Cannot add ring %s to IVSHMEM metadata\n",
+ ctrl->vm_ports[portid].rx_ring->name);
+ if (rte_ivshmem_metadata_add_ring(ctrl->vm_ports[portid].tx_ring,
+ METADATA_NAME) < 0)
+ rte_exit(EXIT_FAILURE, "Cannot add ring %s to IVSHMEM metadata\n",
+ ctrl->vm_ports[portid].tx_ring->name);
+ }
+ generate_ivshmem_cmdline(METADATA_NAME);
+
+ ctrl->nb_ports = nb_ports_available;
+
+ printf("Waiting for VM to initialize...\n");
+
+ /* wait for VM to initialize */
+ while (ctrl->state != STATE_FWD) {
+ if (ctrl->state == STATE_FAIL)
+ rte_exit(EXIT_FAILURE, "VM reported failure\n");
+
+ sleep(1);
+ }
+
+ printf("Done!\n");
+
+ sigsetup();
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l2fwd_ivshmem_launch_one_lcore, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ if (ctrl->state == STATE_FAIL)
+ rte_exit(EXIT_FAILURE, "VM reported failure\n");
+
+ return 0;
+}
diff --git a/examples/l2fwd-ivshmem/include/common.h b/examples/l2fwd-ivshmem/include/common.h
new file mode 100644
index 00000000..8564d32b
--- /dev/null
+++ b/examples/l2fwd-ivshmem/include/common.h
@@ -0,0 +1,111 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _IVSHMEM_COMMON_H_
+#define _IVSHMEM_COMMON_H_
+
+#define RTE_LOGTYPE_L2FWD_IVSHMEM RTE_LOGTYPE_USER1
+
+#define CTRL_MZ_NAME "CTRL_MEMZONE"
+#define MBUF_MP_NAME "MBUF_MEMPOOL"
+#define RX_RING_PREFIX "RX_"
+#define TX_RING_PREFIX "TX_"
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */
+#define MAX_TIMER_PERIOD 86400 /* 1 day max */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */
+
+#define DIM(x)\
+ (sizeof(x)/sizeof(x)[0])
+
+#define MAX_PKT_BURST 32
+
+const struct rte_memzone * ctrl_mz;
+
+enum l2fwd_state {
+ STATE_NONE = 0,
+ STATE_FWD,
+ STATE_EXIT,
+ STATE_FAIL
+};
+
+/* Per-port statistics struct */
+struct port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+
+struct mbuf_table {
+ unsigned len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST * 2]; /**< allow up to two bursts */
+};
+
+struct vm_port_param {
+ struct rte_ring * rx_ring; /**< receiving ring for current port */
+ struct rte_ring * tx_ring; /**< transmitting ring for current port */
+ struct vm_port_param * dst; /**< current port's destination port */
+ volatile struct port_statistics stats; /**< statistics for current port */
+ struct ether_addr ethaddr; /**< Ethernet address of the port */
+};
+
+/* control structure, to synchronize host and VM */
+struct ivshmem_ctrl {
+ rte_spinlock_t lock;
+ uint8_t nb_ports; /**< total nr of ports */
+ volatile enum l2fwd_state state; /**< report state */
+ struct vm_port_param vm_ports[RTE_MAX_ETHPORTS];
+};
+
+struct ivshmem_ctrl * ctrl;
+
+static unsigned int l2fwd_ivshmem_rx_queue_per_lcore = 1;
+
+static void sighandler(int __rte_unused s)
+{
+ ctrl->state = STATE_EXIT;
+}
+
+static void sigsetup(void)
+{
+ struct sigaction sigIntHandler;
+
+ sigIntHandler.sa_handler = sighandler;
+ sigemptyset(&sigIntHandler.sa_mask);
+ sigIntHandler.sa_flags = 0;
+
+ sigaction(SIGINT, &sigIntHandler, NULL);
+}
+
+#endif /* _IVSHMEM_COMMON_H_ */
diff --git a/examples/l2fwd-jobstats/Makefile b/examples/l2fwd-jobstats/Makefile
new file mode 100644
index 00000000..ab089f66
--- /dev/null
+++ b/examples/l2fwd-jobstats/Makefile
@@ -0,0 +1,51 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd-jobstats
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-jobstats/main.c b/examples/l2fwd-jobstats/main.c
new file mode 100644
index 00000000..9f3a77d2
--- /dev/null
+++ b/examples/l2fwd-jobstats/main.c
@@ -0,0 +1,1026 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <locale.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <ctype.h>
+#include <getopt.h>
+
+#include <rte_alarm.h>
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_spinlock.h>
+
+#include <rte_errno.h>
+#include <rte_jobstats.h>
+#include <rte_timer.h>
+#include <rte_alarm.h>
+
+#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
+
+#define NB_MBUF 8192
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t l2fwd_enabled_port_mask;
+
+/* list of enabled ports */
+static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
+
+#define UPDATE_STEP_UP 1
+#define UPDATE_STEP_DOWN 32
+
+static unsigned int l2fwd_rx_queue_per_lcore = 1;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+ uint64_t next_flush_time[RTE_MAX_ETHPORTS];
+
+ struct rte_timer rx_timers[MAX_RX_QUEUE_PER_LCORE];
+ struct rte_jobstats port_fwd_jobs[MAX_RX_QUEUE_PER_LCORE];
+
+ struct rte_timer flush_timer;
+ struct rte_jobstats flush_job;
+ struct rte_jobstats idle_job;
+ struct rte_jobstats_context jobs_context;
+
+ rte_atomic16_t stats_read_pending;
+ rte_spinlock_t lock;
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+struct rte_mempool *l2fwd_pktmbuf_pool = NULL;
+
+/* Per-port statistics struct */
+struct l2fwd_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
+
+/* 1 day max */
+#define MAX_TIMER_PERIOD 86400
+/* default period is 10 seconds */
+static int64_t timer_period = 10;
+/* default timer frequency */
+static double hz;
+/* BURST_TX_DRAIN_US converted to cycles */
+uint64_t drain_tsc;
+/* Convert cycles to ns */
+static inline double
+cycles_to_ns(uint64_t cycles)
+{
+ double t = cycles;
+
+ t *= (double)NS_PER_S;
+ t /= hz;
+ return t;
+}
+
+static void
+show_lcore_stats(unsigned lcore_id)
+{
+ struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
+ struct rte_jobstats_context *ctx = &qconf->jobs_context;
+ struct rte_jobstats *job;
+ uint8_t i;
+
+ /* LCore statistics. */
+ uint64_t stats_period, loop_count;
+ uint64_t exec, exec_min, exec_max;
+ uint64_t management, management_min, management_max;
+ uint64_t busy, busy_min, busy_max;
+
+ /* Jobs statistics. */
+ const uint8_t port_cnt = qconf->n_rx_port;
+ uint64_t jobs_exec_cnt[port_cnt], jobs_period[port_cnt];
+ uint64_t jobs_exec[port_cnt], jobs_exec_min[port_cnt],
+ jobs_exec_max[port_cnt];
+
+ uint64_t flush_exec_cnt, flush_period;
+ uint64_t flush_exec, flush_exec_min, flush_exec_max;
+
+ uint64_t idle_exec_cnt;
+ uint64_t idle_exec, idle_exec_min, idle_exec_max;
+ uint64_t collection_time = rte_get_timer_cycles();
+
+ /* Ask forwarding thread to give us stats. */
+ rte_atomic16_set(&qconf->stats_read_pending, 1);
+ rte_spinlock_lock(&qconf->lock);
+ rte_atomic16_set(&qconf->stats_read_pending, 0);
+
+ /* Collect context statistics. */
+ stats_period = ctx->state_time - ctx->start_time;
+ loop_count = ctx->loop_cnt;
+
+ exec = ctx->exec_time;
+ exec_min = ctx->min_exec_time;
+ exec_max = ctx->max_exec_time;
+
+ management = ctx->management_time;
+ management_min = ctx->min_management_time;
+ management_max = ctx->max_management_time;
+
+ rte_jobstats_context_reset(ctx);
+
+ for (i = 0; i < port_cnt; i++) {
+ job = &qconf->port_fwd_jobs[i];
+
+ jobs_exec_cnt[i] = job->exec_cnt;
+ jobs_period[i] = job->period;
+
+ jobs_exec[i] = job->exec_time;
+ jobs_exec_min[i] = job->min_exec_time;
+ jobs_exec_max[i] = job->max_exec_time;
+
+ rte_jobstats_reset(job);
+ }
+
+ flush_exec_cnt = qconf->flush_job.exec_cnt;
+ flush_period = qconf->flush_job.period;
+ flush_exec = qconf->flush_job.exec_time;
+ flush_exec_min = qconf->flush_job.min_exec_time;
+ flush_exec_max = qconf->flush_job.max_exec_time;
+ rte_jobstats_reset(&qconf->flush_job);
+
+ idle_exec_cnt = qconf->idle_job.exec_cnt;
+ idle_exec = qconf->idle_job.exec_time;
+ idle_exec_min = qconf->idle_job.min_exec_time;
+ idle_exec_max = qconf->idle_job.max_exec_time;
+ rte_jobstats_reset(&qconf->idle_job);
+
+ rte_spinlock_unlock(&qconf->lock);
+
+ exec -= idle_exec;
+ busy = exec + management;
+ busy_min = exec_min + management_min;
+ busy_max = exec_max + management_max;
+
+
+ collection_time = rte_get_timer_cycles() - collection_time;
+
+#define STAT_FMT "\n%-18s %'14.0f %6.1f%% %'10.0f %'10.0f %'10.0f"
+
+ printf("\n----------------"
+ "\nLCore %3u: statistics (time in ns, collected in %'9.0f)"
+ "\n%-18s %14s %7s %10s %10s %10s "
+ "\n%-18s %'14.0f"
+ "\n%-18s %'14" PRIu64
+ STAT_FMT /* Exec */
+ STAT_FMT /* Management */
+ STAT_FMT /* Busy */
+ STAT_FMT, /* Idle */
+ lcore_id, cycles_to_ns(collection_time),
+ "Stat type", "total", "%total", "avg", "min", "max",
+ "Stats duration:", cycles_to_ns(stats_period),
+ "Loop count:", loop_count,
+ "Exec time",
+ cycles_to_ns(exec), exec * 100.0 / stats_period,
+ cycles_to_ns(loop_count ? exec / loop_count : 0),
+ cycles_to_ns(exec_min),
+ cycles_to_ns(exec_max),
+ "Management time",
+ cycles_to_ns(management), management * 100.0 / stats_period,
+ cycles_to_ns(loop_count ? management / loop_count : 0),
+ cycles_to_ns(management_min),
+ cycles_to_ns(management_max),
+ "Exec + management",
+ cycles_to_ns(busy), busy * 100.0 / stats_period,
+ cycles_to_ns(loop_count ? busy / loop_count : 0),
+ cycles_to_ns(busy_min),
+ cycles_to_ns(busy_max),
+ "Idle (job)",
+ cycles_to_ns(idle_exec), idle_exec * 100.0 / stats_period,
+ cycles_to_ns(idle_exec_cnt ? idle_exec / idle_exec_cnt : 0),
+ cycles_to_ns(idle_exec_min),
+ cycles_to_ns(idle_exec_max));
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ job = &qconf->port_fwd_jobs[i];
+ printf("\n\nJob %" PRIu32 ": %-20s "
+ "\n%-18s %'14" PRIu64
+ "\n%-18s %'14.0f"
+ STAT_FMT,
+ i, job->name,
+ "Exec count:", jobs_exec_cnt[i],
+ "Exec period: ", cycles_to_ns(jobs_period[i]),
+ "Exec time",
+ cycles_to_ns(jobs_exec[i]), jobs_exec[i] * 100.0 / stats_period,
+ cycles_to_ns(jobs_exec_cnt[i] ? jobs_exec[i] / jobs_exec_cnt[i]
+ : 0),
+ cycles_to_ns(jobs_exec_min[i]),
+ cycles_to_ns(jobs_exec_max[i]));
+ }
+
+ if (qconf->n_rx_port > 0) {
+ job = &qconf->flush_job;
+ printf("\n\nJob %" PRIu32 ": %-20s "
+ "\n%-18s %'14" PRIu64
+ "\n%-18s %'14.0f"
+ STAT_FMT,
+ i, job->name,
+ "Exec count:", flush_exec_cnt,
+ "Exec period: ", cycles_to_ns(flush_period),
+ "Exec time",
+ cycles_to_ns(flush_exec), flush_exec * 100.0 / stats_period,
+ cycles_to_ns(flush_exec_cnt ? flush_exec / flush_exec_cnt : 0),
+ cycles_to_ns(flush_exec_min),
+ cycles_to_ns(flush_exec_max));
+ }
+}
+
+/* Print out statistics on packets dropped */
+static void
+show_stats_cb(__rte_unused void *param)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid, lcore_id;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s"
+ "\nPort statistics ===================================",
+ clr, topLeft);
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64
+ "\n====================================================",
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+
+ RTE_LCORE_FOREACH(lcore_id) {
+ if (lcore_queue_conf[lcore_id].n_rx_port > 0)
+ show_lcore_stats(lcore_id);
+ }
+
+ printf("\n====================================================\n");
+ rte_eal_alarm_set(timer_period * US_PER_S, show_stats_cb, NULL);
+}
+
+static void
+l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ int sent;
+ unsigned dst_port;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ dst_port = l2fwd_dst_ports[portid];
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
+
+ buffer = tx_buffer[dst_port];
+ sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+ if (sent)
+ port_statistics[dst_port].tx += sent;
+}
+
+static void
+l2fwd_job_update_cb(struct rte_jobstats *job, int64_t result)
+{
+ int64_t err = job->target - result;
+ int64_t histeresis = job->target / 8;
+
+ if (err < -histeresis) {
+ if (job->min_period + UPDATE_STEP_DOWN < job->period)
+ job->period -= UPDATE_STEP_DOWN;
+ } else if (err > histeresis) {
+ if (job->period + UPDATE_STEP_UP < job->max_period)
+ job->period += UPDATE_STEP_UP;
+ }
+}
+
+static void
+l2fwd_fwd_job(__rte_unused struct rte_timer *timer, void *arg)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+
+ const uint8_t port_idx = (uintptr_t) arg;
+ const unsigned lcore_id = rte_lcore_id();
+ struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
+ struct rte_jobstats *job = &qconf->port_fwd_jobs[port_idx];
+ const uint8_t portid = qconf->rx_port_list[port_idx];
+
+ uint8_t j;
+ uint16_t total_nb_rx;
+
+ rte_jobstats_start(&qconf->jobs_context, job);
+
+ /* Call rx burst 2 times. This allow rte_jobstats logic to see if this
+ * function must be called more frequently. */
+
+ total_nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst,
+ MAX_PKT_BURST);
+
+ for (j = 0; j < total_nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ l2fwd_simple_forward(m, portid);
+ }
+
+ if (total_nb_rx == MAX_PKT_BURST) {
+ const uint16_t nb_rx = rte_eth_rx_burst((uint8_t) portid, 0, pkts_burst,
+ MAX_PKT_BURST);
+
+ total_nb_rx += nb_rx;
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ l2fwd_simple_forward(m, portid);
+ }
+ }
+
+ port_statistics[portid].rx += total_nb_rx;
+
+ /* Adjust period time in which we are running here. */
+ if (rte_jobstats_finish(job, total_nb_rx) != 0) {
+ rte_timer_reset(&qconf->rx_timers[port_idx], job->period, PERIODICAL,
+ lcore_id, l2fwd_fwd_job, arg);
+ }
+}
+
+static void
+l2fwd_flush_job(__rte_unused struct rte_timer *timer, __rte_unused void *arg)
+{
+ uint64_t now;
+ unsigned lcore_id;
+ struct lcore_queue_conf *qconf;
+ uint8_t portid;
+ unsigned i;
+ uint32_t sent;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ rte_jobstats_start(&qconf->jobs_context, &qconf->flush_job);
+
+ now = rte_get_timer_cycles();
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
+
+ if (qconf->next_flush_time[portid] <= now)
+ continue;
+
+ buffer = tx_buffer[portid];
+ sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
+ if (sent)
+ port_statistics[portid].tx += sent;
+
+ qconf->next_flush_time[portid] = rte_get_timer_cycles() + drain_tsc;
+ }
+
+ /* Pass target to indicate that this job is happy of time interwal
+ * in which it was called. */
+ rte_jobstats_finish(&qconf->flush_job, qconf->flush_job.target);
+}
+
+/* main processing loop */
+static void
+l2fwd_main_loop(void)
+{
+ unsigned lcore_id;
+ unsigned i, portid;
+ struct lcore_queue_conf *qconf;
+ uint8_t stats_read_pending = 0;
+ uint8_t need_manage;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ rte_jobstats_init(&qconf->idle_job, "idle", 0, 0, 0, 0);
+
+ for (;;) {
+ rte_spinlock_lock(&qconf->lock);
+
+ do {
+ rte_jobstats_context_start(&qconf->jobs_context);
+
+ /* Do the Idle job:
+ * - Read stats_read_pending flag
+ * - check if some real job need to be executed
+ */
+ rte_jobstats_start(&qconf->jobs_context, &qconf->idle_job);
+
+ uint64_t repeats = 0;
+
+ do {
+ uint8_t i;
+ uint64_t now = rte_get_timer_cycles();
+
+ repeats++;
+ need_manage = qconf->flush_timer.expire < now;
+ /* Check if we was esked to give a stats. */
+ stats_read_pending =
+ rte_atomic16_read(&qconf->stats_read_pending);
+ need_manage |= stats_read_pending;
+
+ for (i = 0; i < qconf->n_rx_port && !need_manage; i++)
+ need_manage = qconf->rx_timers[i].expire < now;
+
+ } while (!need_manage);
+
+ if (likely(repeats != 1))
+ rte_jobstats_finish(&qconf->idle_job, qconf->idle_job.target);
+ else
+ rte_jobstats_abort(&qconf->idle_job);
+
+ rte_timer_manage();
+ rte_jobstats_context_finish(&qconf->jobs_context);
+ } while (likely(stats_read_pending == 0));
+
+ rte_spinlock_unlock(&qconf->lock);
+ rte_pause();
+ }
+}
+
+static int
+l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ l2fwd_main_loop();
+ return 0;
+}
+
+/* display usage */
+static void
+l2fwd_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n"
+ " -l set system default locale instead of default (\"C\" locale) for thousands separator in stats.",
+ prgname);
+}
+
+static int
+l2fwd_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static unsigned int
+l2fwd_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:T:l",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
+ if (l2fwd_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
+ if (l2fwd_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_parse_timer_period(optarg);
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* For thousands separator in printf. */
+ case 'l':
+ setlocale(LC_ALL, "");
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ unsigned lcore_id, rx_lcore_id;
+ unsigned nb_ports_in_mask = 0;
+ int ret;
+ char name[RTE_JOBSTATS_NAMESIZE];
+ uint8_t nb_ports;
+ uint8_t nb_ports_available;
+ uint8_t portid, last_port;
+ uint8_t i;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
+
+ rte_timer_subsystem_init();
+
+ /* fetch default timer frequency. */
+ hz = rte_get_timer_hz();
+
+ /* create the mbuf pool */
+ l2fwd_pktmbuf_pool =
+ rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (l2fwd_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /* reset l2fwd_dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ l2fwd_dst_ports[portid] = 0;
+ last_port = 0;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ if (nb_ports_in_mask % 2) {
+ l2fwd_dst_ports[portid] = last_port;
+ l2fwd_dst_ports[last_port] = portid;
+ } else
+ last_port = portid;
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ l2fwd_dst_ports[last_port] = last_port;
+ }
+
+ rx_lcore_id = 0;
+ qconf = NULL;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_rx_queue_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->n_rx_port++;
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
+ }
+
+ nb_ports_available = nb_ports;
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n", (unsigned) portid);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ l2fwd_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Initialize TX buffers */
+ tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
+ rte_eth_tx_buffer_count_callback,
+ &port_statistics[portid].dropped);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ printf("done:\n");
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
+
+ drain_tsc = (hz + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ RTE_LCORE_FOREACH(lcore_id) {
+ qconf = &lcore_queue_conf[lcore_id];
+
+ rte_spinlock_init(&qconf->lock);
+
+ if (rte_jobstats_context_init(&qconf->jobs_context) != 0)
+ rte_panic("Jobs stats context for core %u init failed\n", lcore_id);
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD,
+ "lcore %u: no ports so no jobs stats context initialization\n",
+ lcore_id);
+ continue;
+ }
+ /* Add flush job.
+ * Set fixed period by setting min = max = initial period. Set target to
+ * zero as it is irrelevant for this job. */
+ rte_jobstats_init(&qconf->flush_job, "flush", drain_tsc, drain_tsc,
+ drain_tsc, 0);
+
+ rte_timer_init(&qconf->flush_timer);
+ ret = rte_timer_reset(&qconf->flush_timer, drain_tsc, PERIODICAL,
+ lcore_id, &l2fwd_flush_job, NULL);
+
+ if (ret < 0) {
+ rte_exit(1, "Failed to reset flush job timer for lcore %u: %s",
+ lcore_id, rte_strerror(-ret));
+ }
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ struct rte_jobstats *job = &qconf->port_fwd_jobs[i];
+
+ portid = qconf->rx_port_list[i];
+ printf("Setting forward jon for port %u\n", portid);
+
+ snprintf(name, RTE_DIM(name), "port %u fwd", portid);
+ /* Setup forward job.
+ * Set min, max and initial period. Set target to MAX_PKT_BURST as
+ * this is desired optimal RX/TX burst size. */
+ rte_jobstats_init(job, name, 0, drain_tsc, 0, MAX_PKT_BURST);
+ rte_jobstats_set_update_period_function(job, l2fwd_job_update_cb);
+
+ rte_timer_init(&qconf->rx_timers[i]);
+ ret = rte_timer_reset(&qconf->rx_timers[i], 0, PERIODICAL, lcore_id,
+ &l2fwd_fwd_job, (void *)(uintptr_t)i);
+
+ if (ret < 0) {
+ rte_exit(1, "Failed to reset lcore %u port %u job timer: %s",
+ lcore_id, qconf->rx_port_list[i], rte_strerror(-ret));
+ }
+ }
+ }
+
+ if (timer_period)
+ rte_eal_alarm_set(timer_period * MS_PER_S, show_stats_cb, NULL);
+ else
+ RTE_LOG(INFO, L2FWD, "Stats display disabled\n");
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l2fwd-keepalive/Makefile b/examples/l2fwd-keepalive/Makefile
new file mode 100644
index 00000000..568edcb4
--- /dev/null
+++ b/examples/l2fwd-keepalive/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd-keepalive
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd-keepalive/main.c b/examples/l2fwd-keepalive/main.c
new file mode 100644
index 00000000..8da89aa1
--- /dev/null
+++ b/examples/l2fwd-keepalive/main.c
@@ -0,0 +1,782 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_timer.h>
+#include <rte_keepalive.h>
+
+#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
+
+#define NB_MBUF 8192
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t l2fwd_enabled_port_mask;
+
+/* list of enabled ports */
+static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
+
+static unsigned int l2fwd_rx_queue_per_lcore = 1;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+struct rte_mempool *l2fwd_pktmbuf_pool = NULL;
+
+/* Per-port statistics struct */
+struct l2fwd_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 1
+#define MAX_TIMER_PERIOD 86400 /* 1 day max */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* 10 seconds */
+static int64_t check_period = 5; /* default check cycle is 5ms */
+
+/* Keepalive structure */
+struct rte_keepalive *rte_global_keepalive_info;
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(__attribute__((unused)) struct rte_timer *ptr_timer,
+ __attribute__((unused)) void *ptr_data)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H', '\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+ printf("\n====================================================\n");
+}
+
+static void
+l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ int sent;
+ unsigned dst_port;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ dst_port = l2fwd_dst_ports[portid];
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
+
+ buffer = tx_buffer[dst_port];
+ sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+ if (sent)
+ port_statistics[dst_port].tx += sent;
+}
+
+/* main processing loop */
+static void
+l2fwd_main_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ int sent;
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
+ / US_PER_S * BURST_TX_DRAIN_US;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ uint64_t tsc_initial = rte_rdtsc();
+ uint64_t tsc_lifetime = (rand()&0x07) * rte_get_tsc_hz();
+
+ while (1) {
+ /* Keepalive heartbeat */
+ rte_keepalive_mark_alive(rte_global_keepalive_info);
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * Die randomly within 7 secs for demo purposes if
+ * keepalive enabled
+ */
+ if (check_period > 0 && cur_tsc - tsc_initial > tsc_lifetime)
+ break;
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
+ buffer = tx_buffer[portid];
+
+ sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
+ if (sent)
+ port_statistics[portid].tx += sent;
+
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ port_statistics[portid].rx += nb_rx;
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ l2fwd_simple_forward(m, portid);
+ }
+ }
+ }
+}
+
+static int
+l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ l2fwd_main_loop();
+ return 0;
+}
+
+/* display usage */
+static void
+l2fwd_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -K PERIOD: Keepalive check period (5 default; 86400 max)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static int
+l2fwd_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static unsigned int
+l2fwd_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+static int
+l2fwd_parse_check_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:T:K:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
+ if (l2fwd_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
+ if (l2fwd_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_parse_timer_period(optarg)
+ * (int64_t)(1000 * TIMER_MILLISECOND);
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* Check period */
+ case 'K':
+ check_period = l2fwd_parse_check_period(optarg);
+ if (check_period < 0) {
+ printf("invalid check period\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+static void
+dead_core(__attribute__((unused)) void *ptr_data, const int id_core)
+{
+ printf("Dead core %i - restarting..\n", id_core);
+ if (rte_eal_get_lcore_state(id_core) == FINISHED) {
+ rte_eal_wait_lcore(id_core);
+ rte_eal_remote_launch(l2fwd_launch_one_lcore, NULL, id_core);
+ } else {
+ printf("..false positive!\n");
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ int ret;
+ uint8_t nb_ports;
+ uint8_t nb_ports_available;
+ uint8_t portid, last_port;
+ unsigned lcore_id, rx_lcore_id;
+ unsigned nb_ports_in_mask = 0;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ l2fwd_enabled_port_mask = 0;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
+
+ /* create the mbuf pool */
+ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (l2fwd_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /* reset l2fwd_dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ l2fwd_dst_ports[portid] = 0;
+ last_port = 0;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ if (nb_ports_in_mask % 2) {
+ l2fwd_dst_ports[portid] = last_port;
+ l2fwd_dst_ports[last_port] = portid;
+ } else
+ last_port = portid;
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ l2fwd_dst_ports[last_port] = last_port;
+ }
+
+ rx_lcore_id = 1;
+ qconf = NULL;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_rx_queue_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->n_rx_port++;
+ printf("Lcore %u: RX port %u\n",
+ rx_lcore_id, (unsigned) portid);
+ }
+
+ nb_ports_available = nb_ports;
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n",
+ (unsigned) portid);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_macaddr_get(portid, &l2fwd_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ l2fwd_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Initialize TX buffers */
+ tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
+ rte_eth_tx_buffer_count_callback,
+ &port_statistics[portid].dropped);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_start:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: "
+ "%02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
+
+ struct rte_timer hb_timer, stats_timer;
+
+ rte_timer_subsystem_init();
+ rte_timer_init(&stats_timer);
+
+ if (check_period > 0) {
+ rte_global_keepalive_info =
+ rte_keepalive_create(&dead_core, NULL);
+ if (rte_global_keepalive_info == NULL)
+ rte_exit(EXIT_FAILURE, "init_keep_alive() failed");
+ rte_timer_init(&hb_timer);
+ if (rte_timer_reset(&hb_timer,
+ (check_period * rte_get_timer_hz()) / 1000,
+ PERIODICAL,
+ rte_lcore_id(),
+ (void(*)(struct rte_timer*, void*))
+ &rte_keepalive_dispatch_pings,
+ rte_global_keepalive_info
+ ) != 0 )
+ rte_exit(EXIT_FAILURE, "Keepalive setup failure.\n");
+ }
+ if (timer_period > 0) {
+ if (rte_timer_reset(&stats_timer,
+ (timer_period * rte_get_timer_hz()) / 1000,
+ PERIODICAL,
+ rte_lcore_id(),
+ &print_stats, NULL
+ ) != 0 )
+ rte_exit(EXIT_FAILURE, "Stats setup failure.\n");
+ }
+ /* launch per-lcore init on every slave lcore */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ struct lcore_queue_conf *qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0)
+ RTE_LOG(INFO, L2FWD,
+ "lcore %u has nothing to do\n",
+ lcore_id
+ );
+ else {
+ rte_eal_remote_launch(
+ l2fwd_launch_one_lcore,
+ NULL,
+ lcore_id
+ );
+ rte_keepalive_register_core(rte_global_keepalive_info,
+ lcore_id);
+ }
+ }
+ for (;;) {
+ rte_timer_manage();
+ rte_delay_ms(5);
+ }
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l2fwd/Makefile b/examples/l2fwd/Makefile
new file mode 100644
index 00000000..78feeeb8
--- /dev/null
+++ b/examples/l2fwd/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l2fwd/main.c b/examples/l2fwd/main.c
new file mode 100644
index 00000000..1ad94887
--- /dev/null
+++ b/examples/l2fwd/main.c
@@ -0,0 +1,720 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+static volatile bool force_quit;
+
+#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
+
+#define NB_MBUF 8192
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t l2fwd_enabled_port_mask = 0;
+
+/* list of enabled ports */
+static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
+
+static unsigned int l2fwd_rx_queue_per_lcore = 1;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+struct rte_mempool * l2fwd_pktmbuf_pool = NULL;
+
+/* Per-port statistics struct */
+struct l2fwd_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+struct l2fwd_port_statistics port_statistics[RTE_MAX_ETHPORTS];
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */
+#define MAX_TIMER_PERIOD 86400 /* 1 day max */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+ printf("\n====================================================\n");
+}
+
+static void
+l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ unsigned dst_port;
+ int sent;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ dst_port = l2fwd_dst_ports[portid];
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
+
+ buffer = tx_buffer[dst_port];
+ sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+ if (sent)
+ port_statistics[dst_port].tx += sent;
+}
+
+/* main processing loop */
+static void
+l2fwd_main_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ int sent;
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
+ BURST_TX_DRAIN_US;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ prev_tsc = 0;
+ timer_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+
+ }
+
+ while (!force_quit) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
+ buffer = tx_buffer[portid];
+
+ sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
+ if (sent)
+ port_statistics[portid].tx += sent;
+
+ }
+
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
+
+ /* do this only on master core */
+ if (lcore_id == rte_get_master_lcore()) {
+ print_stats();
+ /* reset the timer */
+ timer_tsc = 0;
+ }
+ }
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ port_statistics[portid].rx += nb_rx;
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ l2fwd_simple_forward(m, portid);
+ }
+ }
+ }
+}
+
+static int
+l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ l2fwd_main_loop();
+ return 0;
+}
+
+/* display usage */
+static void
+l2fwd_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static int
+l2fwd_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static unsigned int
+l2fwd_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:T:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
+ if (l2fwd_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
+ if (l2fwd_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND;
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ if (force_quit)
+ return;
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if (force_quit)
+ return;
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+static void
+signal_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM) {
+ printf("\n\nSignal %d received, preparing to exit...\n",
+ signum);
+ force_quit = true;
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ int ret;
+ uint8_t nb_ports;
+ uint8_t nb_ports_available;
+ uint8_t portid, last_port;
+ unsigned lcore_id, rx_lcore_id;
+ unsigned nb_ports_in_mask = 0;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ force_quit = false;
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
+
+ /* create the mbuf pool */
+ l2fwd_pktmbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (l2fwd_pktmbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /* reset l2fwd_dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ l2fwd_dst_ports[portid] = 0;
+ last_port = 0;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ if (nb_ports_in_mask % 2) {
+ l2fwd_dst_ports[portid] = last_port;
+ l2fwd_dst_ports[last_port] = portid;
+ }
+ else
+ last_port = portid;
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ l2fwd_dst_ports[last_port] = last_port;
+ }
+
+ rx_lcore_id = 0;
+ qconf = NULL;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_rx_queue_per_lcore) {
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->n_rx_port++;
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
+ }
+
+ nb_ports_available = nb_ports;
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n", (unsigned) portid);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ l2fwd_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Initialize TX buffers */
+ tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
+ rte_eth_tx_buffer_count_callback,
+ &port_statistics[portid].dropped);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ printf("done: \n");
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
+
+ ret = 0;
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l2fwd_launch_one_lcore, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("Closing port %d...", portid);
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ printf(" Done\n");
+ }
+ printf("Bye...\n");
+
+ return ret;
+}
diff --git a/examples/l3fwd-acl/Makefile b/examples/l3fwd-acl/Makefile
new file mode 100644
index 00000000..a3473a83
--- /dev/null
+++ b/examples/l3fwd-acl/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l3fwd-acl
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l3fwd-acl/main.c b/examples/l3fwd-acl/main.c
new file mode 100644
index 00000000..26d9f5eb
--- /dev/null
+++ b/examples/l3fwd-acl/main.c
@@ -0,0 +1,2079 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+#include <rte_acl.h>
+
+#define DO_RFC_1812_CHECKS
+
+#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
+
+#define MAX_JUMBO_PKT_LEN 9600
+
+#define MEMPOOL_CACHE_SIZE 256
+
+/*
+ * This expression is used to calculate the number of mbufs needed
+ * depending on user input, taking into account memory for rx and tx hardware
+ * rings, cache per lcore and mtable per port per lcore.
+ * RTE_MAX is used to ensure that NB_MBUF never goes below a
+ * minimum value of 8192
+ */
+
+#define NB_MBUF RTE_MAX(\
+ (nb_ports * nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
+ nb_ports * nb_lcores * MAX_PKT_BURST + \
+ nb_ports * n_tx_queue * RTE_TEST_TX_DESC_DEFAULT + \
+ nb_lcores * MEMPOOL_CACHE_SIZE), \
+ (unsigned)8192)
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define NB_SOCKETS 8
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+static int promiscuous_on; /**< Ports set in promiscuous mode off by default. */
+static int numa_on = 1; /**< NUMA is enabled by default. */
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_LCORE_PARAMS 1024
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_array_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static struct lcore_params *lcore_params = lcore_params_array_default;
+static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
+ sizeof(lcore_params_array_default[0]);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP | ETH_RSS_UDP |
+ ETH_RSS_TCP | ETH_RSS_SCTP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
+
+/***********************start of ACL part******************************/
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len);
+#endif
+static inline void
+send_single_packet(struct rte_mbuf *m, uint8_t port);
+
+#define MAX_ACL_RULE_NUM 100000
+#define DEFAULT_MAX_CATEGORIES 1
+#define L3FWD_ACL_IPV4_NAME "l3fwd-acl-ipv4"
+#define L3FWD_ACL_IPV6_NAME "l3fwd-acl-ipv6"
+#define ACL_LEAD_CHAR ('@')
+#define ROUTE_LEAD_CHAR ('R')
+#define COMMENT_LEAD_CHAR ('#')
+#define OPTION_CONFIG "config"
+#define OPTION_NONUMA "no-numa"
+#define OPTION_ENBJMO "enable-jumbo"
+#define OPTION_RULE_IPV4 "rule_ipv4"
+#define OPTION_RULE_IPV6 "rule_ipv6"
+#define OPTION_SCALAR "scalar"
+#define ACL_DENY_SIGNATURE 0xf0000000
+#define RTE_LOGTYPE_L3FWDACL RTE_LOGTYPE_USER3
+#define acl_log(format, ...) RTE_LOG(ERR, L3FWDACL, format, ##__VA_ARGS__)
+#define uint32_t_to_char(ip, a, b, c, d) do {\
+ *a = (unsigned char)(ip >> 24 & 0xff);\
+ *b = (unsigned char)(ip >> 16 & 0xff);\
+ *c = (unsigned char)(ip >> 8 & 0xff);\
+ *d = (unsigned char)(ip & 0xff);\
+ } while (0)
+#define OFF_ETHHEAD (sizeof(struct ether_hdr))
+#define OFF_IPV42PROTO (offsetof(struct ipv4_hdr, next_proto_id))
+#define OFF_IPV62PROTO (offsetof(struct ipv6_hdr, proto))
+#define MBUF_IPV4_2PROTO(m) \
+ rte_pktmbuf_mtod_offset((m), uint8_t *, OFF_ETHHEAD + OFF_IPV42PROTO)
+#define MBUF_IPV6_2PROTO(m) \
+ rte_pktmbuf_mtod_offset((m), uint8_t *, OFF_ETHHEAD + OFF_IPV62PROTO)
+
+#define GET_CB_FIELD(in, fd, base, lim, dlm) do { \
+ unsigned long val; \
+ char *end; \
+ errno = 0; \
+ val = strtoul((in), &end, (base)); \
+ if (errno != 0 || end[0] != (dlm) || val > (lim)) \
+ return -EINVAL; \
+ (fd) = (typeof(fd))val; \
+ (in) = end + 1; \
+} while (0)
+
+/*
+ * ACL rules should have higher priorities than route ones to ensure ACL rule
+ * always be found when input packets have multi-matches in the database.
+ * A exception case is performance measure, which can define route rules with
+ * higher priority and route rules will always be returned in each lookup.
+ * Reserve range from ACL_RULE_PRIORITY_MAX + 1 to
+ * RTE_ACL_MAX_PRIORITY for route entries in performance measure
+ */
+#define ACL_RULE_PRIORITY_MAX 0x10000000
+
+/*
+ * Forward port info save in ACL lib starts from 1
+ * since ACL assume 0 is invalid.
+ * So, need add 1 when saving and minus 1 when forwarding packets.
+ */
+#define FWD_PORT_SHIFT 1
+
+/*
+ * Rule and trace formats definitions.
+ */
+
+enum {
+ PROTO_FIELD_IPV4,
+ SRC_FIELD_IPV4,
+ DST_FIELD_IPV4,
+ SRCP_FIELD_IPV4,
+ DSTP_FIELD_IPV4,
+ NUM_FIELDS_IPV4
+};
+
+/*
+ * That effectively defines order of IPV4VLAN classifications:
+ * - PROTO
+ * - VLAN (TAG and DOMAIN)
+ * - SRC IP ADDRESS
+ * - DST IP ADDRESS
+ * - PORTS (SRC and DST)
+ */
+enum {
+ RTE_ACL_IPV4VLAN_PROTO,
+ RTE_ACL_IPV4VLAN_VLAN,
+ RTE_ACL_IPV4VLAN_SRC,
+ RTE_ACL_IPV4VLAN_DST,
+ RTE_ACL_IPV4VLAN_PORTS,
+ RTE_ACL_IPV4VLAN_NUM
+};
+
+struct rte_acl_field_def ipv4_defs[NUM_FIELDS_IPV4] = {
+ {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = PROTO_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4VLAN_PROTO,
+ .offset = 0,
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4VLAN_SRC,
+ .offset = offsetof(struct ipv4_hdr, src_addr) -
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4VLAN_DST,
+ .offset = offsetof(struct ipv4_hdr, dst_addr) -
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = SRCP_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4VLAN_PORTS,
+ .offset = sizeof(struct ipv4_hdr) -
+ offsetof(struct ipv4_hdr, next_proto_id),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = DSTP_FIELD_IPV4,
+ .input_index = RTE_ACL_IPV4VLAN_PORTS,
+ .offset = sizeof(struct ipv4_hdr) -
+ offsetof(struct ipv4_hdr, next_proto_id) +
+ sizeof(uint16_t),
+ },
+};
+
+#define IPV6_ADDR_LEN 16
+#define IPV6_ADDR_U16 (IPV6_ADDR_LEN / sizeof(uint16_t))
+#define IPV6_ADDR_U32 (IPV6_ADDR_LEN / sizeof(uint32_t))
+
+enum {
+ PROTO_FIELD_IPV6,
+ SRC1_FIELD_IPV6,
+ SRC2_FIELD_IPV6,
+ SRC3_FIELD_IPV6,
+ SRC4_FIELD_IPV6,
+ DST1_FIELD_IPV6,
+ DST2_FIELD_IPV6,
+ DST3_FIELD_IPV6,
+ DST4_FIELD_IPV6,
+ SRCP_FIELD_IPV6,
+ DSTP_FIELD_IPV6,
+ NUM_FIELDS_IPV6
+};
+
+struct rte_acl_field_def ipv6_defs[NUM_FIELDS_IPV6] = {
+ {
+ .type = RTE_ACL_FIELD_TYPE_BITMASK,
+ .size = sizeof(uint8_t),
+ .field_index = PROTO_FIELD_IPV6,
+ .input_index = PROTO_FIELD_IPV6,
+ .offset = 0,
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC1_FIELD_IPV6,
+ .input_index = SRC1_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, src_addr) -
+ offsetof(struct ipv6_hdr, proto),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC2_FIELD_IPV6,
+ .input_index = SRC2_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, src_addr) -
+ offsetof(struct ipv6_hdr, proto) + sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC3_FIELD_IPV6,
+ .input_index = SRC3_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, src_addr) -
+ offsetof(struct ipv6_hdr, proto) + 2 * sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = SRC4_FIELD_IPV6,
+ .input_index = SRC4_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, src_addr) -
+ offsetof(struct ipv6_hdr, proto) + 3 * sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST1_FIELD_IPV6,
+ .input_index = DST1_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, dst_addr)
+ - offsetof(struct ipv6_hdr, proto),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST2_FIELD_IPV6,
+ .input_index = DST2_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, dst_addr) -
+ offsetof(struct ipv6_hdr, proto) + sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST3_FIELD_IPV6,
+ .input_index = DST3_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, dst_addr) -
+ offsetof(struct ipv6_hdr, proto) + 2 * sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_MASK,
+ .size = sizeof(uint32_t),
+ .field_index = DST4_FIELD_IPV6,
+ .input_index = DST4_FIELD_IPV6,
+ .offset = offsetof(struct ipv6_hdr, dst_addr) -
+ offsetof(struct ipv6_hdr, proto) + 3 * sizeof(uint32_t),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = SRCP_FIELD_IPV6,
+ .input_index = SRCP_FIELD_IPV6,
+ .offset = sizeof(struct ipv6_hdr) -
+ offsetof(struct ipv6_hdr, proto),
+ },
+ {
+ .type = RTE_ACL_FIELD_TYPE_RANGE,
+ .size = sizeof(uint16_t),
+ .field_index = DSTP_FIELD_IPV6,
+ .input_index = SRCP_FIELD_IPV6,
+ .offset = sizeof(struct ipv6_hdr) -
+ offsetof(struct ipv6_hdr, proto) + sizeof(uint16_t),
+ },
+};
+
+enum {
+ CB_FLD_SRC_ADDR,
+ CB_FLD_DST_ADDR,
+ CB_FLD_SRC_PORT_LOW,
+ CB_FLD_SRC_PORT_DLM,
+ CB_FLD_SRC_PORT_HIGH,
+ CB_FLD_DST_PORT_LOW,
+ CB_FLD_DST_PORT_DLM,
+ CB_FLD_DST_PORT_HIGH,
+ CB_FLD_PROTO,
+ CB_FLD_USERDATA,
+ CB_FLD_NUM,
+};
+
+RTE_ACL_RULE_DEF(acl4_rule, RTE_DIM(ipv4_defs));
+RTE_ACL_RULE_DEF(acl6_rule, RTE_DIM(ipv6_defs));
+
+struct acl_search_t {
+ const uint8_t *data_ipv4[MAX_PKT_BURST];
+ struct rte_mbuf *m_ipv4[MAX_PKT_BURST];
+ uint32_t res_ipv4[MAX_PKT_BURST];
+ int num_ipv4;
+
+ const uint8_t *data_ipv6[MAX_PKT_BURST];
+ struct rte_mbuf *m_ipv6[MAX_PKT_BURST];
+ uint32_t res_ipv6[MAX_PKT_BURST];
+ int num_ipv6;
+};
+
+static struct {
+ char mapped[NB_SOCKETS];
+ struct rte_acl_ctx *acx_ipv4[NB_SOCKETS];
+ struct rte_acl_ctx *acx_ipv6[NB_SOCKETS];
+#ifdef L3FWDACL_DEBUG
+ struct acl4_rule *rule_ipv4;
+ struct acl6_rule *rule_ipv6;
+#endif
+} acl_config;
+
+static struct{
+ const char *rule_ipv4_name;
+ const char *rule_ipv6_name;
+ int scalar;
+} parm_config;
+
+const char cb_port_delim[] = ":";
+
+static inline void
+print_one_ipv4_rule(struct acl4_rule *rule, int extra)
+{
+ unsigned char a, b, c, d;
+
+ uint32_t_to_char(rule->field[SRC_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d,
+ rule->field[SRC_FIELD_IPV4].mask_range.u32);
+ uint32_t_to_char(rule->field[DST_FIELD_IPV4].value.u32,
+ &a, &b, &c, &d);
+ printf("%hhu.%hhu.%hhu.%hhu/%u ", a, b, c, d,
+ rule->field[DST_FIELD_IPV4].mask_range.u32);
+ printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ",
+ rule->field[SRCP_FIELD_IPV4].value.u16,
+ rule->field[SRCP_FIELD_IPV4].mask_range.u16,
+ rule->field[DSTP_FIELD_IPV4].value.u16,
+ rule->field[DSTP_FIELD_IPV4].mask_range.u16,
+ rule->field[PROTO_FIELD_IPV4].value.u8,
+ rule->field[PROTO_FIELD_IPV4].mask_range.u8);
+ if (extra)
+ printf("0x%x-0x%x-0x%x ",
+ rule->data.category_mask,
+ rule->data.priority,
+ rule->data.userdata);
+}
+
+static inline void
+print_one_ipv6_rule(struct acl6_rule *rule, int extra)
+{
+ unsigned char a, b, c, d;
+
+ uint32_t_to_char(rule->field[SRC1_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf("%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[SRC2_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[SRC3_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[SRC4_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x/%u ", a, b, c, d,
+ rule->field[SRC1_FIELD_IPV6].mask_range.u32
+ + rule->field[SRC2_FIELD_IPV6].mask_range.u32
+ + rule->field[SRC3_FIELD_IPV6].mask_range.u32
+ + rule->field[SRC4_FIELD_IPV6].mask_range.u32);
+
+ uint32_t_to_char(rule->field[DST1_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf("%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[DST2_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[DST3_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x", a, b, c, d);
+ uint32_t_to_char(rule->field[DST4_FIELD_IPV6].value.u32,
+ &a, &b, &c, &d);
+ printf(":%.2x%.2x:%.2x%.2x/%u ", a, b, c, d,
+ rule->field[DST1_FIELD_IPV6].mask_range.u32
+ + rule->field[DST2_FIELD_IPV6].mask_range.u32
+ + rule->field[DST3_FIELD_IPV6].mask_range.u32
+ + rule->field[DST4_FIELD_IPV6].mask_range.u32);
+
+ printf("%hu : %hu %hu : %hu 0x%hhx/0x%hhx ",
+ rule->field[SRCP_FIELD_IPV6].value.u16,
+ rule->field[SRCP_FIELD_IPV6].mask_range.u16,
+ rule->field[DSTP_FIELD_IPV6].value.u16,
+ rule->field[DSTP_FIELD_IPV6].mask_range.u16,
+ rule->field[PROTO_FIELD_IPV6].value.u8,
+ rule->field[PROTO_FIELD_IPV6].mask_range.u8);
+ if (extra)
+ printf("0x%x-0x%x-0x%x ",
+ rule->data.category_mask,
+ rule->data.priority,
+ rule->data.userdata);
+}
+
+/* Bypass comment and empty lines */
+static inline int
+is_bypass_line(char *buff)
+{
+ int i = 0;
+
+ /* comment line */
+ if (buff[0] == COMMENT_LEAD_CHAR)
+ return 1;
+ /* empty line */
+ while (buff[i] != '\0') {
+ if (!isspace(buff[i]))
+ return 0;
+ i++;
+ }
+ return 1;
+}
+
+#ifdef L3FWDACL_DEBUG
+static inline void
+dump_acl4_rule(struct rte_mbuf *m, uint32_t sig)
+{
+ uint32_t offset = sig & ~ACL_DENY_SIGNATURE;
+ unsigned char a, b, c, d;
+ struct ipv4_hdr *ipv4_hdr = rte_pktmbuf_mtod_offset(m,
+ struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+ uint32_t_to_char(rte_bswap32(ipv4_hdr->src_addr), &a, &b, &c, &d);
+ printf("Packet Src:%hhu.%hhu.%hhu.%hhu ", a, b, c, d);
+ uint32_t_to_char(rte_bswap32(ipv4_hdr->dst_addr), &a, &b, &c, &d);
+ printf("Dst:%hhu.%hhu.%hhu.%hhu ", a, b, c, d);
+
+ printf("Src port:%hu,Dst port:%hu ",
+ rte_bswap16(*(uint16_t *)(ipv4_hdr + 1)),
+ rte_bswap16(*((uint16_t *)(ipv4_hdr + 1) + 1)));
+ printf("hit ACL %d - ", offset);
+
+ print_one_ipv4_rule(acl_config.rule_ipv4 + offset, 1);
+
+ printf("\n\n");
+}
+
+static inline void
+dump_acl6_rule(struct rte_mbuf *m, uint32_t sig)
+{
+ unsigned i;
+ uint32_t offset = sig & ~ACL_DENY_SIGNATURE;
+ struct ipv6_hdr *ipv6_hdr = rte_pktmbuf_mtod_offset(m,
+ struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ printf("Packet Src");
+ for (i = 0; i < RTE_DIM(ipv6_hdr->src_addr); i += sizeof(uint16_t))
+ printf(":%.2x%.2x",
+ ipv6_hdr->src_addr[i], ipv6_hdr->src_addr[i + 1]);
+
+ printf("\nDst");
+ for (i = 0; i < RTE_DIM(ipv6_hdr->dst_addr); i += sizeof(uint16_t))
+ printf(":%.2x%.2x",
+ ipv6_hdr->dst_addr[i], ipv6_hdr->dst_addr[i + 1]);
+
+ printf("\nSrc port:%hu,Dst port:%hu ",
+ rte_bswap16(*(uint16_t *)(ipv6_hdr + 1)),
+ rte_bswap16(*((uint16_t *)(ipv6_hdr + 1) + 1)));
+ printf("hit ACL %d - ", offset);
+
+ print_one_ipv6_rule(acl_config.rule_ipv6 + offset, 1);
+
+ printf("\n\n");
+}
+#endif /* L3FWDACL_DEBUG */
+
+static inline void
+dump_ipv4_rules(struct acl4_rule *rule, int num, int extra)
+{
+ int i;
+
+ for (i = 0; i < num; i++, rule++) {
+ printf("\t%d:", i + 1);
+ print_one_ipv4_rule(rule, extra);
+ printf("\n");
+ }
+}
+
+static inline void
+dump_ipv6_rules(struct acl6_rule *rule, int num, int extra)
+{
+ int i;
+
+ for (i = 0; i < num; i++, rule++) {
+ printf("\t%d:", i + 1);
+ print_one_ipv6_rule(rule, extra);
+ printf("\n");
+ }
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline void
+prepare_one_packet(struct rte_mbuf **pkts_in, struct acl_search_t *acl,
+ int index)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct rte_mbuf *pkt = pkts_in[index];
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, pkt->pkt_len) >= 0) {
+
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+
+ /* Fill acl structure */
+ acl->data_ipv4[acl->num_ipv4] = MBUF_IPV4_2PROTO(pkt);
+ acl->m_ipv4[(acl->num_ipv4)++] = pkt;
+
+ } else {
+ /* Not a valid IPv4 packet */
+ rte_pktmbuf_free(pkt);
+ }
+ } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+ /* Fill acl structure */
+ acl->data_ipv6[acl->num_ipv6] = MBUF_IPV6_2PROTO(pkt);
+ acl->m_ipv6[(acl->num_ipv6)++] = pkt;
+
+ } else {
+ /* Unknown type, drop the packet */
+ rte_pktmbuf_free(pkt);
+ }
+}
+
+#else
+static inline void
+prepare_one_packet(struct rte_mbuf **pkts_in, struct acl_search_t *acl,
+ int index)
+{
+ struct rte_mbuf *pkt = pkts_in[index];
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ /* Fill acl structure */
+ acl->data_ipv4[acl->num_ipv4] = MBUF_IPV4_2PROTO(pkt);
+ acl->m_ipv4[(acl->num_ipv4)++] = pkt;
+
+ } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+ /* Fill acl structure */
+ acl->data_ipv6[acl->num_ipv6] = MBUF_IPV6_2PROTO(pkt);
+ acl->m_ipv6[(acl->num_ipv6)++] = pkt;
+ } else {
+ /* Unknown type, drop the packet */
+ rte_pktmbuf_free(pkt);
+ }
+}
+#endif /* DO_RFC_1812_CHECKS */
+
+static inline void
+prepare_acl_parameter(struct rte_mbuf **pkts_in, struct acl_search_t *acl,
+ int nb_rx)
+{
+ int i;
+
+ acl->num_ipv4 = 0;
+ acl->num_ipv6 = 0;
+
+ /* Prefetch first packets */
+ for (i = 0; i < PREFETCH_OFFSET && i < nb_rx; i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_in[i], void *));
+ }
+
+ for (i = 0; i < (nb_rx - PREFETCH_OFFSET); i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_in[
+ i + PREFETCH_OFFSET], void *));
+ prepare_one_packet(pkts_in, acl, i);
+ }
+
+ /* Process left packets */
+ for (; i < nb_rx; i++)
+ prepare_one_packet(pkts_in, acl, i);
+}
+
+static inline void
+send_one_packet(struct rte_mbuf *m, uint32_t res)
+{
+ if (likely((res & ACL_DENY_SIGNATURE) == 0 && res != 0)) {
+ /* forward packets */
+ send_single_packet(m,
+ (uint8_t)(res - FWD_PORT_SHIFT));
+ } else{
+ /* in the ACL list, drop it */
+#ifdef L3FWDACL_DEBUG
+ if ((res & ACL_DENY_SIGNATURE) != 0) {
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type))
+ dump_acl4_rule(m, res);
+ else if (RTE_ETH_IS_IPV6_HDR(m->packet_type))
+ dump_acl6_rule(m, res);
+ }
+#endif
+ rte_pktmbuf_free(m);
+ }
+}
+
+
+
+static inline void
+send_packets(struct rte_mbuf **m, uint32_t *res, int num)
+{
+ int i;
+
+ /* Prefetch first packets */
+ for (i = 0; i < PREFETCH_OFFSET && i < num; i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ m[i], void *));
+ }
+
+ for (i = 0; i < (num - PREFETCH_OFFSET); i++) {
+ rte_prefetch0(rte_pktmbuf_mtod(m[
+ i + PREFETCH_OFFSET], void *));
+ send_one_packet(m[i], res[i]);
+ }
+
+ /* Process left packets */
+ for (; i < num; i++)
+ send_one_packet(m[i], res[i]);
+}
+
+/*
+ * Parses IPV6 address, exepcts the following format:
+ * XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX:XXXX (where X - is a hexedecimal digit).
+ */
+static int
+parse_ipv6_addr(const char *in, const char **end, uint32_t v[IPV6_ADDR_U32],
+ char dlm)
+{
+ uint32_t addr[IPV6_ADDR_U16];
+
+ GET_CB_FIELD(in, addr[0], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[1], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[2], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[3], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[4], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[5], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[6], 16, UINT16_MAX, ':');
+ GET_CB_FIELD(in, addr[7], 16, UINT16_MAX, dlm);
+
+ *end = in;
+
+ v[0] = (addr[0] << 16) + addr[1];
+ v[1] = (addr[2] << 16) + addr[3];
+ v[2] = (addr[4] << 16) + addr[5];
+ v[3] = (addr[6] << 16) + addr[7];
+
+ return 0;
+}
+
+static int
+parse_ipv6_net(const char *in, struct rte_acl_field field[4])
+{
+ int32_t rc;
+ const char *mp;
+ uint32_t i, m, v[4];
+ const uint32_t nbu32 = sizeof(uint32_t) * CHAR_BIT;
+
+ /* get address. */
+ rc = parse_ipv6_addr(in, &mp, v, '/');
+ if (rc != 0)
+ return rc;
+
+ /* get mask. */
+ GET_CB_FIELD(mp, m, 0, CHAR_BIT * sizeof(v), 0);
+
+ /* put all together. */
+ for (i = 0; i != RTE_DIM(v); i++) {
+ if (m >= (i + 1) * nbu32)
+ field[i].mask_range.u32 = nbu32;
+ else
+ field[i].mask_range.u32 = m > (i * nbu32) ?
+ m - (i * 32) : 0;
+
+ field[i].value.u32 = v[i];
+ }
+
+ return 0;
+}
+
+static int
+parse_cb_ipv6_rule(char *str, struct rte_acl_rule *v, int has_userdata)
+{
+ int i, rc;
+ char *s, *sp, *in[CB_FLD_NUM];
+ static const char *dlm = " \t\n";
+ int dim = has_userdata ? CB_FLD_NUM : CB_FLD_USERDATA;
+ s = str;
+
+ for (i = 0; i != dim; i++, s = NULL) {
+ in[i] = strtok_r(s, dlm, &sp);
+ if (in[i] == NULL)
+ return -EINVAL;
+ }
+
+ rc = parse_ipv6_net(in[CB_FLD_SRC_ADDR], v->field + SRC1_FIELD_IPV6);
+ if (rc != 0) {
+ acl_log("failed to read source address/mask: %s\n",
+ in[CB_FLD_SRC_ADDR]);
+ return rc;
+ }
+
+ rc = parse_ipv6_net(in[CB_FLD_DST_ADDR], v->field + DST1_FIELD_IPV6);
+ if (rc != 0) {
+ acl_log("failed to read destination address/mask: %s\n",
+ in[CB_FLD_DST_ADDR]);
+ return rc;
+ }
+
+ /* source port. */
+ GET_CB_FIELD(in[CB_FLD_SRC_PORT_LOW],
+ v->field[SRCP_FIELD_IPV6].value.u16,
+ 0, UINT16_MAX, 0);
+ GET_CB_FIELD(in[CB_FLD_SRC_PORT_HIGH],
+ v->field[SRCP_FIELD_IPV6].mask_range.u16,
+ 0, UINT16_MAX, 0);
+
+ if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim,
+ sizeof(cb_port_delim)) != 0)
+ return -EINVAL;
+
+ /* destination port. */
+ GET_CB_FIELD(in[CB_FLD_DST_PORT_LOW],
+ v->field[DSTP_FIELD_IPV6].value.u16,
+ 0, UINT16_MAX, 0);
+ GET_CB_FIELD(in[CB_FLD_DST_PORT_HIGH],
+ v->field[DSTP_FIELD_IPV6].mask_range.u16,
+ 0, UINT16_MAX, 0);
+
+ if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim,
+ sizeof(cb_port_delim)) != 0)
+ return -EINVAL;
+
+ if (v->field[SRCP_FIELD_IPV6].mask_range.u16
+ < v->field[SRCP_FIELD_IPV6].value.u16
+ || v->field[DSTP_FIELD_IPV6].mask_range.u16
+ < v->field[DSTP_FIELD_IPV6].value.u16)
+ return -EINVAL;
+
+ GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV6].value.u8,
+ 0, UINT8_MAX, '/');
+ GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV6].mask_range.u8,
+ 0, UINT8_MAX, 0);
+
+ if (has_userdata)
+ GET_CB_FIELD(in[CB_FLD_USERDATA], v->data.userdata,
+ 0, UINT32_MAX, 0);
+
+ return 0;
+}
+
+/*
+ * Parse ClassBench rules file.
+ * Expected format:
+ * '@'<src_ipv4_addr>'/'<masklen> <space> \
+ * <dst_ipv4_addr>'/'<masklen> <space> \
+ * <src_port_low> <space> ":" <src_port_high> <space> \
+ * <dst_port_low> <space> ":" <dst_port_high> <space> \
+ * <proto>'/'<mask>
+ */
+static int
+parse_ipv4_net(const char *in, uint32_t *addr, uint32_t *mask_len)
+{
+ uint8_t a, b, c, d, m;
+
+ GET_CB_FIELD(in, a, 0, UINT8_MAX, '.');
+ GET_CB_FIELD(in, b, 0, UINT8_MAX, '.');
+ GET_CB_FIELD(in, c, 0, UINT8_MAX, '.');
+ GET_CB_FIELD(in, d, 0, UINT8_MAX, '/');
+ GET_CB_FIELD(in, m, 0, sizeof(uint32_t) * CHAR_BIT, 0);
+
+ addr[0] = IPv4(a, b, c, d);
+ mask_len[0] = m;
+
+ return 0;
+}
+
+static int
+parse_cb_ipv4vlan_rule(char *str, struct rte_acl_rule *v, int has_userdata)
+{
+ int i, rc;
+ char *s, *sp, *in[CB_FLD_NUM];
+ static const char *dlm = " \t\n";
+ int dim = has_userdata ? CB_FLD_NUM : CB_FLD_USERDATA;
+ s = str;
+
+ for (i = 0; i != dim; i++, s = NULL) {
+ in[i] = strtok_r(s, dlm, &sp);
+ if (in[i] == NULL)
+ return -EINVAL;
+ }
+
+ rc = parse_ipv4_net(in[CB_FLD_SRC_ADDR],
+ &v->field[SRC_FIELD_IPV4].value.u32,
+ &v->field[SRC_FIELD_IPV4].mask_range.u32);
+ if (rc != 0) {
+ acl_log("failed to read source address/mask: %s\n",
+ in[CB_FLD_SRC_ADDR]);
+ return rc;
+ }
+
+ rc = parse_ipv4_net(in[CB_FLD_DST_ADDR],
+ &v->field[DST_FIELD_IPV4].value.u32,
+ &v->field[DST_FIELD_IPV4].mask_range.u32);
+ if (rc != 0) {
+ acl_log("failed to read destination address/mask: %s\n",
+ in[CB_FLD_DST_ADDR]);
+ return rc;
+ }
+
+ GET_CB_FIELD(in[CB_FLD_SRC_PORT_LOW],
+ v->field[SRCP_FIELD_IPV4].value.u16,
+ 0, UINT16_MAX, 0);
+ GET_CB_FIELD(in[CB_FLD_SRC_PORT_HIGH],
+ v->field[SRCP_FIELD_IPV4].mask_range.u16,
+ 0, UINT16_MAX, 0);
+
+ if (strncmp(in[CB_FLD_SRC_PORT_DLM], cb_port_delim,
+ sizeof(cb_port_delim)) != 0)
+ return -EINVAL;
+
+ GET_CB_FIELD(in[CB_FLD_DST_PORT_LOW],
+ v->field[DSTP_FIELD_IPV4].value.u16,
+ 0, UINT16_MAX, 0);
+ GET_CB_FIELD(in[CB_FLD_DST_PORT_HIGH],
+ v->field[DSTP_FIELD_IPV4].mask_range.u16,
+ 0, UINT16_MAX, 0);
+
+ if (strncmp(in[CB_FLD_DST_PORT_DLM], cb_port_delim,
+ sizeof(cb_port_delim)) != 0)
+ return -EINVAL;
+
+ if (v->field[SRCP_FIELD_IPV4].mask_range.u16
+ < v->field[SRCP_FIELD_IPV4].value.u16
+ || v->field[DSTP_FIELD_IPV4].mask_range.u16
+ < v->field[DSTP_FIELD_IPV4].value.u16)
+ return -EINVAL;
+
+ GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV4].value.u8,
+ 0, UINT8_MAX, '/');
+ GET_CB_FIELD(in[CB_FLD_PROTO], v->field[PROTO_FIELD_IPV4].mask_range.u8,
+ 0, UINT8_MAX, 0);
+
+ if (has_userdata)
+ GET_CB_FIELD(in[CB_FLD_USERDATA], v->data.userdata, 0,
+ UINT32_MAX, 0);
+
+ return 0;
+}
+
+static int
+add_rules(const char *rule_path,
+ struct rte_acl_rule **proute_base,
+ unsigned int *proute_num,
+ struct rte_acl_rule **pacl_base,
+ unsigned int *pacl_num, uint32_t rule_size,
+ int (*parser)(char *, struct rte_acl_rule*, int))
+{
+ uint8_t *acl_rules, *route_rules;
+ struct rte_acl_rule *next;
+ unsigned int acl_num = 0, route_num = 0, total_num = 0;
+ unsigned int acl_cnt = 0, route_cnt = 0;
+ char buff[LINE_MAX];
+ FILE *fh = fopen(rule_path, "rb");
+ unsigned int i = 0;
+
+ if (fh == NULL)
+ rte_exit(EXIT_FAILURE, "%s: Open %s failed\n", __func__,
+ rule_path);
+
+ while ((fgets(buff, LINE_MAX, fh) != NULL)) {
+ if (buff[0] == ROUTE_LEAD_CHAR)
+ route_num++;
+ else if (buff[0] == ACL_LEAD_CHAR)
+ acl_num++;
+ }
+
+ if (0 == route_num)
+ rte_exit(EXIT_FAILURE, "Not find any route entries in %s!\n",
+ rule_path);
+
+ fseek(fh, 0, SEEK_SET);
+
+ acl_rules = calloc(acl_num, rule_size);
+
+ if (NULL == acl_rules)
+ rte_exit(EXIT_FAILURE, "%s: failed to malloc memory\n",
+ __func__);
+
+ route_rules = calloc(route_num, rule_size);
+
+ if (NULL == route_rules)
+ rte_exit(EXIT_FAILURE, "%s: failed to malloc memory\n",
+ __func__);
+
+ i = 0;
+ while (fgets(buff, LINE_MAX, fh) != NULL) {
+ i++;
+
+ if (is_bypass_line(buff))
+ continue;
+
+ char s = buff[0];
+
+ /* Route entry */
+ if (s == ROUTE_LEAD_CHAR)
+ next = (struct rte_acl_rule *)(route_rules +
+ route_cnt * rule_size);
+
+ /* ACL entry */
+ else if (s == ACL_LEAD_CHAR)
+ next = (struct rte_acl_rule *)(acl_rules +
+ acl_cnt * rule_size);
+
+ /* Illegal line */
+ else
+ rte_exit(EXIT_FAILURE,
+ "%s Line %u: should start with leading "
+ "char %c or %c\n",
+ rule_path, i, ROUTE_LEAD_CHAR, ACL_LEAD_CHAR);
+
+ if (parser(buff + 1, next, s == ROUTE_LEAD_CHAR) != 0)
+ rte_exit(EXIT_FAILURE,
+ "%s Line %u: parse rules error\n",
+ rule_path, i);
+
+ if (s == ROUTE_LEAD_CHAR) {
+ /* Check the forwarding port number */
+ if ((enabled_port_mask & (1 << next->data.userdata)) ==
+ 0)
+ rte_exit(EXIT_FAILURE,
+ "%s Line %u: fwd number illegal:%u\n",
+ rule_path, i, next->data.userdata);
+ next->data.userdata += FWD_PORT_SHIFT;
+ route_cnt++;
+ } else {
+ next->data.userdata = ACL_DENY_SIGNATURE + acl_cnt;
+ acl_cnt++;
+ }
+
+ next->data.priority = RTE_ACL_MAX_PRIORITY - total_num;
+ next->data.category_mask = -1;
+ total_num++;
+ }
+
+ fclose(fh);
+
+ *pacl_base = (struct rte_acl_rule *)acl_rules;
+ *pacl_num = acl_num;
+ *proute_base = (struct rte_acl_rule *)route_rules;
+ *proute_num = route_cnt;
+
+ return 0;
+}
+
+static void
+dump_acl_config(void)
+{
+ printf("ACL option are:\n");
+ printf(OPTION_RULE_IPV4": %s\n", parm_config.rule_ipv4_name);
+ printf(OPTION_RULE_IPV6": %s\n", parm_config.rule_ipv6_name);
+ printf(OPTION_SCALAR": %d\n", parm_config.scalar);
+}
+
+static int
+check_acl_config(void)
+{
+ if (parm_config.rule_ipv4_name == NULL) {
+ acl_log("ACL IPv4 rule file not specified\n");
+ return -1;
+ } else if (parm_config.rule_ipv6_name == NULL) {
+ acl_log("ACL IPv6 rule file not specified\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct rte_acl_ctx*
+setup_acl(struct rte_acl_rule *route_base,
+ struct rte_acl_rule *acl_base, unsigned int route_num,
+ unsigned int acl_num, int ipv6, int socketid)
+{
+ char name[PATH_MAX];
+ struct rte_acl_param acl_param;
+ struct rte_acl_config acl_build_param;
+ struct rte_acl_ctx *context;
+ int dim = ipv6 ? RTE_DIM(ipv6_defs) : RTE_DIM(ipv4_defs);
+
+ /* Create ACL contexts */
+ snprintf(name, sizeof(name), "%s%d",
+ ipv6 ? L3FWD_ACL_IPV6_NAME : L3FWD_ACL_IPV4_NAME,
+ socketid);
+
+ acl_param.name = name;
+ acl_param.socket_id = socketid;
+ acl_param.rule_size = RTE_ACL_RULE_SZ(dim);
+ acl_param.max_rule_num = MAX_ACL_RULE_NUM;
+
+ if ((context = rte_acl_create(&acl_param)) == NULL)
+ rte_exit(EXIT_FAILURE, "Failed to create ACL context\n");
+
+ if (parm_config.scalar && rte_acl_set_ctx_classify(context,
+ RTE_ACL_CLASSIFY_SCALAR) != 0)
+ rte_exit(EXIT_FAILURE,
+ "Failed to setup classify method for ACL context\n");
+
+ if (rte_acl_add_rules(context, route_base, route_num) < 0)
+ rte_exit(EXIT_FAILURE, "add rules failed\n");
+
+ if (rte_acl_add_rules(context, acl_base, acl_num) < 0)
+ rte_exit(EXIT_FAILURE, "add rules failed\n");
+
+ /* Perform builds */
+ memset(&acl_build_param, 0, sizeof(acl_build_param));
+
+ acl_build_param.num_categories = DEFAULT_MAX_CATEGORIES;
+ acl_build_param.num_fields = dim;
+ memcpy(&acl_build_param.defs, ipv6 ? ipv6_defs : ipv4_defs,
+ ipv6 ? sizeof(ipv6_defs) : sizeof(ipv4_defs));
+
+ if (rte_acl_build(context, &acl_build_param) != 0)
+ rte_exit(EXIT_FAILURE, "Failed to build ACL trie\n");
+
+ rte_acl_dump(context);
+
+ return context;
+}
+
+static int
+app_acl_init(void)
+{
+ unsigned lcore_id;
+ unsigned int i;
+ int socketid;
+ struct rte_acl_rule *acl_base_ipv4, *route_base_ipv4,
+ *acl_base_ipv6, *route_base_ipv6;
+ unsigned int acl_num_ipv4 = 0, route_num_ipv4 = 0,
+ acl_num_ipv6 = 0, route_num_ipv6 = 0;
+
+ if (check_acl_config() != 0)
+ rte_exit(EXIT_FAILURE, "Failed to get valid ACL options\n");
+
+ dump_acl_config();
+
+ /* Load rules from the input file */
+ if (add_rules(parm_config.rule_ipv4_name, &route_base_ipv4,
+ &route_num_ipv4, &acl_base_ipv4, &acl_num_ipv4,
+ sizeof(struct acl4_rule), &parse_cb_ipv4vlan_rule) < 0)
+ rte_exit(EXIT_FAILURE, "Failed to add rules\n");
+
+ acl_log("IPv4 Route entries %u:\n", route_num_ipv4);
+ dump_ipv4_rules((struct acl4_rule *)route_base_ipv4, route_num_ipv4, 1);
+
+ acl_log("IPv4 ACL entries %u:\n", acl_num_ipv4);
+ dump_ipv4_rules((struct acl4_rule *)acl_base_ipv4, acl_num_ipv4, 1);
+
+ if (add_rules(parm_config.rule_ipv6_name, &route_base_ipv6,
+ &route_num_ipv6,
+ &acl_base_ipv6, &acl_num_ipv6,
+ sizeof(struct acl6_rule), &parse_cb_ipv6_rule) < 0)
+ rte_exit(EXIT_FAILURE, "Failed to add rules\n");
+
+ acl_log("IPv6 Route entries %u:\n", route_num_ipv6);
+ dump_ipv6_rules((struct acl6_rule *)route_base_ipv6, route_num_ipv6, 1);
+
+ acl_log("IPv6 ACL entries %u:\n", acl_num_ipv6);
+ dump_ipv6_rules((struct acl6_rule *)acl_base_ipv6, acl_num_ipv6, 1);
+
+ memset(&acl_config, 0, sizeof(acl_config));
+
+ /* Check sockets a context should be created on */
+ if (!numa_on)
+ acl_config.mapped[0] = 1;
+ else {
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ if (socketid >= NB_SOCKETS) {
+ acl_log("Socket %d of lcore %u is out "
+ "of range %d\n",
+ socketid, lcore_id, NB_SOCKETS);
+ free(route_base_ipv4);
+ free(route_base_ipv6);
+ free(acl_base_ipv4);
+ free(acl_base_ipv6);
+ return -1;
+ }
+
+ acl_config.mapped[socketid] = 1;
+ }
+ }
+
+ for (i = 0; i < NB_SOCKETS; i++) {
+ if (acl_config.mapped[i]) {
+ acl_config.acx_ipv4[i] = setup_acl(route_base_ipv4,
+ acl_base_ipv4, route_num_ipv4, acl_num_ipv4,
+ 0, i);
+
+ acl_config.acx_ipv6[i] = setup_acl(route_base_ipv6,
+ acl_base_ipv6, route_num_ipv6, acl_num_ipv6,
+ 1, i);
+ }
+ }
+
+ free(route_base_ipv4);
+ free(route_base_ipv6);
+
+#ifdef L3FWDACL_DEBUG
+ acl_config.rule_ipv4 = (struct acl4_rule *)acl_base_ipv4;
+ acl_config.rule_ipv6 = (struct acl6_rule *)acl_base_ipv6;
+#else
+ free(acl_base_ipv4);
+ free(acl_base_ipv6);
+#endif
+
+ return 0;
+}
+
+/***********************end of ACL part******************************/
+
+struct lcore_conf {
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t n_tx_port;
+ uint16_t tx_port_id[RTE_MAX_ETHPORTS];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline void
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint32_t lcore_id;
+ struct lcore_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_conf[lcore_id];
+ rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
+ qconf->tx_buffer[port], m);
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif
+
+/* main processing loop */
+static int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ int socketid;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
+ / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+ socketid = rte_lcore_to_socket_id(lcore_id);
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD,
+ " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+ for (i = 0; i < qconf->n_tx_port; ++i) {
+ portid = qconf->tx_port_id[i];
+ rte_eth_tx_buffer_flush(portid,
+ qconf->tx_queue_id[portid],
+ qconf->tx_buffer[portid]);
+ }
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid,
+ pkts_burst, MAX_PKT_BURST);
+
+ if (nb_rx > 0) {
+ struct acl_search_t acl_search;
+
+ prepare_acl_parameter(pkts_burst, &acl_search,
+ nb_rx);
+
+ if (acl_search.num_ipv4) {
+ rte_acl_classify(
+ acl_config.acx_ipv4[socketid],
+ acl_search.data_ipv4,
+ acl_search.res_ipv4,
+ acl_search.num_ipv4,
+ DEFAULT_MAX_CATEGORIES);
+
+ send_packets(acl_search.m_ipv4,
+ acl_search.res_ipv4,
+ acl_search.num_ipv4);
+ }
+
+ if (acl_search.num_ipv6) {
+ rte_acl_classify(
+ acl_config.acx_ipv6[socketid],
+ acl_search.data_ipv6,
+ acl_search.res_ipv6,
+ acl_search.num_ipv6,
+ DEFAULT_MAX_CATEGORIES);
+
+ send_packets(acl_search.m_ipv6,
+ acl_search.res_ipv6,
+ acl_search.num_ipv6);
+ }
+ }
+ }
+ }
+}
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i;
+ int socketid;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ queue = lcore_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ printf("invalid queue number: %hhu\n", queue);
+ return -1;
+ }
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in "
+ "lcore mask\n", lcore);
+ return -1;
+ }
+ socketid = rte_lcore_to_socket_id(lcore);
+ if (socketid != 0 && numa_on == 0) {
+ printf("warning: lcore %hhu is on socket %d "
+ "with numa off\n",
+ lcore, socketid);
+ }
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ portid = lcore_params[i].port_id;
+
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port &&
+ lcore_params[i].queue_id > queue)
+ queue = lcore_params[i].queue_id;
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)lcore);
+ return -1;
+ } else {
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK -P"
+ "--"OPTION_RULE_IPV4"=FILE"
+ "--"OPTION_RULE_IPV6"=FILE"
+ " [--"OPTION_CONFIG" (port,queue,lcore)[,(port,queue,lcore]]"
+ " [--"OPTION_ENBJMO" [--max-pkt-len PKTLEN]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -P : enable promiscuous mode\n"
+ " --"OPTION_CONFIG": (port,queue,lcore): "
+ "rx queues configuration\n"
+ " --"OPTION_NONUMA": optional, disable numa awareness\n"
+ " --"OPTION_ENBJMO": enable jumbo frame"
+ " which max packet len is PKTLEN in decimal (64-9600)\n"
+ " --"OPTION_RULE_IPV4"=FILE: specify the ipv4 rules entries "
+ "file. "
+ "Each rule occupy one line. "
+ "2 kinds of rules are supported. "
+ "One is ACL entry at while line leads with character '%c', "
+ "another is route entry at while line leads with "
+ "character '%c'.\n"
+ " --"OPTION_RULE_IPV6"=FILE: specify the ipv6 rules "
+ "entries file.\n"
+ " --"OPTION_SCALAR": Use scalar function to do lookup\n",
+ prgname, ACL_LEAD_CHAR, ROUTE_LEAD_CHAR);
+}
+
+static int
+parse_max_pkt_len(const char *pktlen)
+{
+ char *end = NULL;
+ unsigned long len;
+
+ /* parse decimal string */
+ len = strtoul(pktlen, &end, 10);
+ if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ return len;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ if ((p0 = strchr(p, ')')) == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
+ _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {OPTION_CONFIG, 1, 0, 0},
+ {OPTION_NONUMA, 0, 0, 0},
+ {OPTION_ENBJMO, 0, 0, 0},
+ {OPTION_RULE_IPV4, 1, 0, 0},
+ {OPTION_RULE_IPV6, 1, 0, 0},
+ {OPTION_SCALAR, 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:P",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'P':
+ printf("Promiscuous mode selected\n");
+ promiscuous_on = 1;
+ break;
+
+ /* long options */
+ case 0:
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_CONFIG,
+ sizeof(OPTION_CONFIG))) {
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_NONUMA,
+ sizeof(OPTION_NONUMA))) {
+ printf("numa is disabled\n");
+ numa_on = 0;
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_ENBJMO, sizeof(OPTION_ENBJMO))) {
+ struct option lenopts = {
+ "max-pkt-len",
+ required_argument,
+ 0,
+ 0
+ };
+
+ printf("jumbo frame is enabled\n");
+ port_conf.rxmode.jumbo_frame = 1;
+
+ /*
+ * if no max-pkt-len set, then use the
+ * default value ETHER_MAX_LEN
+ */
+ if (0 == getopt_long(argc, argvopt, "",
+ &lenopts, &option_index)) {
+ ret = parse_max_pkt_len(optarg);
+ if ((ret < 64) ||
+ (ret > MAX_JUMBO_PKT_LEN)) {
+ printf("invalid packet "
+ "length\n");
+ print_usage(prgname);
+ return -1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = ret;
+ }
+ printf("set jumbo frame max packet length "
+ "to %u\n",
+ (unsigned int)
+ port_conf.rxmode.max_rx_pkt_len);
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_RULE_IPV4,
+ sizeof(OPTION_RULE_IPV4)))
+ parm_config.rule_ipv4_name = optarg;
+
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_RULE_IPV6,
+ sizeof(OPTION_RULE_IPV6))) {
+ parm_config.rule_ipv6_name = optarg;
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ OPTION_SCALAR, sizeof(OPTION_SCALAR)))
+ parm_config.scalar = 1;
+
+
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+static int
+init_mem(unsigned nb_mbuf)
+{
+ int socketid;
+ unsigned lcore_id;
+ char s[64];
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ if (socketid >= NB_SOCKETS) {
+ rte_exit(EXIT_FAILURE,
+ "Socket %d of lcore %u is out of range %d\n",
+ socketid, lcore_id, NB_SOCKETS);
+ }
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] =
+ rte_pktmbuf_pool_create(s, nb_mbuf,
+ MEMPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ socketid);
+ if (pktmbuf_pool[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Cannot init mbuf pool on socket %d\n",
+ socketid);
+ else
+ printf("Allocated mbuf pool on socket %d\n",
+ socketid);
+ }
+ }
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid, nb_rx_queue, queue, socketid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_exit(EXIT_FAILURE, "check_port_config failed\n");
+
+ /* Add ACL rules and route entries, build trie */
+ if (app_acl_init() < 0)
+ rte_exit(EXIT_FAILURE, "app_acl_init failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid);
+ fflush(stdout);
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)n_tx_queue);
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)n_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+
+ /* init memory */
+ ret = init_mem(NB_MBUF);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_mem failed\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ /* Initialize TX buffers */
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (qconf->tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
+ }
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = (uint8_t)
+ rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (port_conf.rxmode.jumbo_frame)
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socketid, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+
+ qconf->tx_port_id[qconf->n_tx_port] = portid;
+ qconf->n_tx_port++;
+ }
+ printf("\n");
+ }
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ qconf = &lcore_conf[lcore_id];
+ printf("\nInitializing rx queues on lcore %u ... ", lcore_id);
+ fflush(stdout);
+ /* init RX queues */
+ for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+
+ if (numa_on)
+ socketid = (uint8_t)
+ rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid, NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup: err=%d,"
+ "port=%d\n", ret, portid);
+ }
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l3fwd-power/Makefile b/examples/l3fwd-power/Makefile
new file mode 100644
index 00000000..783772a7
--- /dev/null
+++ b/examples/l3fwd-power/Makefile
@@ -0,0 +1,63 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = l3fwd-power
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+endif
diff --git a/examples/l3fwd-power/main.c b/examples/l3fwd-power/main.c
new file mode 100644
index 00000000..cb42bfb9
--- /dev/null
+++ b/examples/l3fwd-power/main.c
@@ -0,0 +1,1760 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+#include <rte_timer.h>
+#include <rte_power.h>
+#include <rte_eal.h>
+#include <rte_spinlock.h>
+
+#define RTE_LOGTYPE_L3FWD_POWER RTE_LOGTYPE_USER1
+
+#define MAX_PKT_BURST 32
+
+#define MIN_ZERO_POLL_COUNT 10
+
+/* around 100ms at 2 Ghz */
+#define TIMER_RESOLUTION_CYCLES 200000000ULL
+/* 100 ms interval */
+#define TIMER_NUMBER_PER_SECOND 10
+/* 100000 us */
+#define SCALING_PERIOD (1000000/TIMER_NUMBER_PER_SECOND)
+#define SCALING_DOWN_TIME_RATIO_THRESHOLD 0.25
+
+#define APP_LOOKUP_EXACT_MATCH 0
+#define APP_LOOKUP_LPM 1
+#define DO_RFC_1812_CHECKS
+
+#ifndef APP_LOOKUP_METHOD
+#define APP_LOOKUP_METHOD APP_LOOKUP_LPM
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+#include <rte_hash.h>
+#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+#include <rte_lpm.h>
+#else
+#error "APP_LOOKUP_METHOD set to incorrect value"
+#endif
+
+#ifndef IPv6_BYTES
+#define IPv6_BYTES_FMT "%02x%02x:%02x%02x:%02x%02x:%02x%02x:"\
+ "%02x%02x:%02x%02x:%02x%02x:%02x%02x"
+#define IPv6_BYTES(addr) \
+ addr[0], addr[1], addr[2], addr[3], \
+ addr[4], addr[5], addr[6], addr[7], \
+ addr[8], addr[9], addr[10], addr[11],\
+ addr[12], addr[13],addr[14], addr[15]
+#endif
+
+#define MAX_JUMBO_PKT_LEN 9600
+
+#define IPV6_ADDR_LEN 16
+
+#define MEMPOOL_CACHE_SIZE 256
+
+/*
+ * This expression is used to calculate the number of mbufs needed depending on
+ * user input, taking into account memory for rx and tx hardware rings, cache
+ * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
+ * NB_MBUF never goes below a minimum value of 8192.
+ */
+
+#define NB_MBUF RTE_MAX ( \
+ (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
+ nb_ports*nb_lcores*MAX_PKT_BURST + \
+ nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
+ (unsigned)8192)
+
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define NB_SOCKETS 8
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* ethernet addresses of ports */
+static rte_spinlock_t locks[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+/* Ports set in promiscuous mode off by default. */
+static int promiscuous_on = 0;
+/* NUMA is enabled by default. */
+static int numa_on = 1;
+
+enum freq_scale_hint_t
+{
+ FREQ_LOWER = -1,
+ FREQ_CURRENT = 0,
+ FREQ_HIGHER = 1,
+ FREQ_HIGHEST = 2
+};
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+ enum freq_scale_hint_t freq_up_hint;
+ uint32_t zero_rx_packet_count;
+ uint32_t idle_hint;
+} __rte_cache_aligned;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_RX_QUEUE_INTERRUPT_PER_PORT 16
+
+
+#define MAX_LCORE_PARAMS 1024
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_array_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static struct lcore_params * lcore_params = lcore_params_array_default;
+static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
+ sizeof(lcore_params_array_default[0]);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_UDP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .intr_conf = {
+ .lsc = 1,
+ .rxq = 1,
+ },
+};
+
+static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
+
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC rte_jhash
+#endif
+
+struct ipv4_5tuple {
+ uint32_t ip_dst;
+ uint32_t ip_src;
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+struct ipv6_5tuple {
+ uint8_t ip_dst[IPV6_ADDR_LEN];
+ uint8_t ip_src[IPV6_ADDR_LEN];
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+struct ipv4_l3fwd_route {
+ struct ipv4_5tuple key;
+ uint8_t if_out;
+};
+
+struct ipv6_l3fwd_route {
+ struct ipv6_5tuple key;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
+ {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
+ {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
+ {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
+ {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
+};
+
+static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
+ {
+ {
+ {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
+ {0xfe, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0x02, 0x1e, 0x67, 0xff, 0xfe, 0x0d, 0xb6, 0x0a},
+ 1, 10, IPPROTO_UDP
+ }, 4
+ },
+};
+
+typedef struct rte_hash lookup_struct_t;
+static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
+static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
+
+#define L3FWD_HASH_ENTRIES 1024
+
+#define IPV4_L3FWD_NUM_ROUTES \
+ (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
+
+#define IPV6_L3FWD_NUM_ROUTES \
+ (sizeof(ipv6_l3fwd_route_array) / sizeof(ipv6_l3fwd_route_array[0]))
+
+static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+struct ipv4_l3fwd_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
+ {IPv4(1,1,1,0), 24, 0},
+ {IPv4(2,1,1,0), 24, 1},
+ {IPv4(3,1,1,0), 24, 2},
+ {IPv4(4,1,1,0), 24, 3},
+ {IPv4(5,1,1,0), 24, 4},
+ {IPv4(6,1,1,0), 24, 5},
+ {IPv4(7,1,1,0), 24, 6},
+ {IPv4(8,1,1,0), 24, 7},
+};
+
+#define IPV4_L3FWD_NUM_ROUTES \
+ (sizeof(ipv4_l3fwd_route_array) / sizeof(ipv4_l3fwd_route_array[0]))
+
+#define IPV4_L3FWD_LPM_MAX_RULES 1024
+
+typedef struct rte_lpm lookup_struct_t;
+static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
+#endif
+
+struct lcore_conf {
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t n_tx_port;
+ uint16_t tx_port_id[RTE_MAX_ETHPORTS];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+ lookup_struct_t * ipv4_lookup_struct;
+ lookup_struct_t * ipv6_lookup_struct;
+} __rte_cache_aligned;
+
+struct lcore_stats {
+ /* total sleep time in ms since last frequency scaling down */
+ uint32_t sleep_time;
+ /* number of long sleep recently */
+ uint32_t nb_long_sleep;
+ /* freq. scaling up trend */
+ uint32_t trend;
+ /* total packet processed recently */
+ uint64_t nb_rx_processed;
+ /* total iterations looped recently */
+ uint64_t nb_iteration_looped;
+ uint32_t padding[9];
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE] __rte_cache_aligned;
+static struct lcore_stats stats[RTE_MAX_LCORE] __rte_cache_aligned;
+static struct rte_timer power_timers[RTE_MAX_LCORE];
+
+static inline uint32_t power_idle_heuristic(uint32_t zero_rx_packet_count);
+static inline enum freq_scale_hint_t power_freq_scaleup_heuristic( \
+ unsigned lcore_id, uint8_t port_id, uint16_t queue_id);
+
+/* exit signal handler */
+static void
+signal_exit_now(int sigtype)
+{
+ unsigned lcore_id;
+ int ret;
+
+ if (sigtype == SIGINT) {
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ /* init power management library */
+ ret = rte_power_exit(lcore_id);
+ if (ret)
+ rte_exit(EXIT_FAILURE, "Power management "
+ "library de-initialization failed on "
+ "core%u\n", lcore_id);
+ }
+ }
+
+ rte_exit(EXIT_SUCCESS, "User forced exit\n");
+}
+
+/* Freqency scale down timer callback */
+static void
+power_timer_cb(__attribute__((unused)) struct rte_timer *tim,
+ __attribute__((unused)) void *arg)
+{
+ uint64_t hz;
+ float sleep_time_ratio;
+ unsigned lcore_id = rte_lcore_id();
+
+ /* accumulate total execution time in us when callback is invoked */
+ sleep_time_ratio = (float)(stats[lcore_id].sleep_time) /
+ (float)SCALING_PERIOD;
+ /**
+ * check whether need to scale down frequency a step if it sleep a lot.
+ */
+ if (sleep_time_ratio >= SCALING_DOWN_TIME_RATIO_THRESHOLD) {
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
+ else if ( (unsigned)(stats[lcore_id].nb_rx_processed /
+ stats[lcore_id].nb_iteration_looped) < MAX_PKT_BURST) {
+ /**
+ * scale down a step if average packet per iteration less
+ * than expectation.
+ */
+ if (rte_power_freq_down)
+ rte_power_freq_down(lcore_id);
+ }
+
+ /**
+ * initialize another timer according to current frequency to ensure
+ * timer interval is relatively fixed.
+ */
+ hz = rte_get_timer_hz();
+ rte_timer_reset(&power_timers[lcore_id], hz/TIMER_NUMBER_PER_SECOND,
+ SINGLE, lcore_id, power_timer_cb, NULL);
+
+ stats[lcore_id].nb_rx_processed = 0;
+ stats[lcore_id].nb_iteration_looped = 0;
+
+ stats[lcore_id].sleep_time = 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint32_t lcore_id;
+ struct lcore_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ rte_eth_tx_buffer(port, qconf->tx_queue_id[port],
+ qconf->tx_buffer[port], m);
+
+ return 0;
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static void
+print_ipv4_key(struct ipv4_5tuple key)
+{
+ printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, "
+ "proto = %d\n", (unsigned)key.ip_dst, (unsigned)key.ip_src,
+ key.port_dst, key.port_src, key.proto);
+}
+static void
+print_ipv6_key(struct ipv6_5tuple key)
+{
+ printf( "IP dst = " IPv6_BYTES_FMT ", IP src = " IPv6_BYTES_FMT ", "
+ "port dst = %d, port src = %d, proto = %d\n",
+ IPv6_BYTES(key.ip_dst), IPv6_BYTES(key.ip_src),
+ key.port_dst, key.port_src, key.proto);
+}
+
+static inline uint8_t
+get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid,
+ lookup_struct_t * ipv4_l3fwd_lookup_struct)
+{
+ struct ipv4_5tuple key;
+ struct tcp_hdr *tcp;
+ struct udp_hdr *udp;
+ int ret = 0;
+
+ key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+ key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
+ key.proto = ipv4_hdr->next_proto_id;
+
+ switch (ipv4_hdr->next_proto_id) {
+ case IPPROTO_TCP:
+ tcp = (struct tcp_hdr *)((unsigned char *)ipv4_hdr +
+ sizeof(struct ipv4_hdr));
+ key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
+ key.port_src = rte_be_to_cpu_16(tcp->src_port);
+ break;
+
+ case IPPROTO_UDP:
+ udp = (struct udp_hdr *)((unsigned char *)ipv4_hdr +
+ sizeof(struct ipv4_hdr));
+ key.port_dst = rte_be_to_cpu_16(udp->dst_port);
+ key.port_src = rte_be_to_cpu_16(udp->src_port);
+ break;
+
+ default:
+ key.port_dst = 0;
+ key.port_src = 0;
+ break;
+ }
+
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0)? portid : ipv4_l3fwd_out_if[ret]);
+}
+
+static inline uint8_t
+get_ipv6_dst_port(struct ipv6_hdr *ipv6_hdr, uint8_t portid,
+ lookup_struct_t *ipv6_l3fwd_lookup_struct)
+{
+ struct ipv6_5tuple key;
+ struct tcp_hdr *tcp;
+ struct udp_hdr *udp;
+ int ret = 0;
+
+ memcpy(key.ip_dst, ipv6_hdr->dst_addr, IPV6_ADDR_LEN);
+ memcpy(key.ip_src, ipv6_hdr->src_addr, IPV6_ADDR_LEN);
+
+ key.proto = ipv6_hdr->proto;
+
+ switch (ipv6_hdr->proto) {
+ case IPPROTO_TCP:
+ tcp = (struct tcp_hdr *)((unsigned char *) ipv6_hdr +
+ sizeof(struct ipv6_hdr));
+ key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
+ key.port_src = rte_be_to_cpu_16(tcp->src_port);
+ break;
+
+ case IPPROTO_UDP:
+ udp = (struct udp_hdr *)((unsigned char *) ipv6_hdr +
+ sizeof(struct ipv6_hdr));
+ key.port_dst = rte_be_to_cpu_16(udp->dst_port);
+ key.port_src = rte_be_to_cpu_16(udp->src_port);
+ break;
+
+ default:
+ key.port_dst = 0;
+ key.port_src = 0;
+ break;
+ }
+
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0)? portid : ipv6_l3fwd_out_if[ret]);
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+static inline uint8_t
+get_ipv4_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid,
+ lookup_struct_t *ipv4_l3fwd_lookup_struct)
+{
+ uint32_t next_hop;
+
+ return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
+ rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
+ next_hop : portid);
+}
+#endif
+
+static inline void
+l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ void *d_addr_bytes;
+ uint8_t dst_port;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ /* Handle IPv4 headers.*/
+ ipv4_hdr =
+ rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+
+ dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
+ qconf->ipv4_lookup_struct);
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* 02:00:00:00:00:xx */
+ d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)d_addr_bytes) =
+ 0x000000000002 + ((uint64_t)dst_port << 40);
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* Handle IPv6 headers.*/
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+ struct ipv6_hdr *ipv6_hdr;
+
+ ipv6_hdr =
+ rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
+ qconf->ipv6_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* 02:00:00:00:00:xx */
+ d_addr_bytes = &eth_hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)d_addr_bytes) =
+ 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+#else
+ /* We don't currently handle IPv6 packets in LPM mode. */
+ rte_pktmbuf_free(m);
+#endif
+ } else
+ rte_pktmbuf_free(m);
+
+}
+
+#define MINIMUM_SLEEP_TIME 1
+#define SUSPEND_THRESHOLD 300
+
+static inline uint32_t
+power_idle_heuristic(uint32_t zero_rx_packet_count)
+{
+ /* If zero count is less than 100, sleep 1us */
+ if (zero_rx_packet_count < SUSPEND_THRESHOLD)
+ return MINIMUM_SLEEP_TIME;
+ /* If zero count is less than 1000, sleep 100 us which is the
+ minimum latency switching from C3/C6 to C0
+ */
+ else
+ return SUSPEND_THRESHOLD;
+
+ return 0;
+}
+
+static inline enum freq_scale_hint_t
+power_freq_scaleup_heuristic(unsigned lcore_id,
+ uint8_t port_id,
+ uint16_t queue_id)
+{
+/**
+ * HW Rx queue size is 128 by default, Rx burst read at maximum 32 entries
+ * per iteration
+ */
+#define FREQ_GEAR1_RX_PACKET_THRESHOLD MAX_PKT_BURST
+#define FREQ_GEAR2_RX_PACKET_THRESHOLD (MAX_PKT_BURST*2)
+#define FREQ_GEAR3_RX_PACKET_THRESHOLD (MAX_PKT_BURST*3)
+#define FREQ_UP_TREND1_ACC 1
+#define FREQ_UP_TREND2_ACC 100
+#define FREQ_UP_THRESHOLD 10000
+
+ if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
+ FREQ_GEAR3_RX_PACKET_THRESHOLD) > 0)) {
+ stats[lcore_id].trend = 0;
+ return FREQ_HIGHEST;
+ } else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
+ FREQ_GEAR2_RX_PACKET_THRESHOLD) > 0))
+ stats[lcore_id].trend += FREQ_UP_TREND2_ACC;
+ else if (likely(rte_eth_rx_descriptor_done(port_id, queue_id,
+ FREQ_GEAR1_RX_PACKET_THRESHOLD) > 0))
+ stats[lcore_id].trend += FREQ_UP_TREND1_ACC;
+
+ if (likely(stats[lcore_id].trend > FREQ_UP_THRESHOLD)) {
+ stats[lcore_id].trend = 0;
+ return FREQ_HIGHER;
+ }
+
+ return FREQ_CURRENT;
+}
+
+/**
+ * force polling thread sleep until one-shot rx interrupt triggers
+ * @param port_id
+ * Port id.
+ * @param queue_id
+ * Rx queue id.
+ * @return
+ * 0 on success
+ */
+static int
+sleep_until_rx_interrupt(int num)
+{
+ struct rte_epoll_event event[num];
+ int n, i;
+ uint8_t port_id, queue_id;
+ void *data;
+
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u sleeps until interrupt triggers\n",
+ rte_lcore_id());
+
+ n = rte_epoll_wait(RTE_EPOLL_PER_THREAD, event, num, -1);
+ for (i = 0; i < n; i++) {
+ data = event[i].epdata.data;
+ port_id = ((uintptr_t)data) >> CHAR_BIT;
+ queue_id = ((uintptr_t)data) &
+ RTE_LEN2MASK(CHAR_BIT, uint8_t);
+ rte_eth_dev_rx_intr_disable(port_id, queue_id);
+ RTE_LOG(INFO, L3FWD_POWER,
+ "lcore %u is waked up from rx interrupt on"
+ " port %d queue %d\n",
+ rte_lcore_id(), port_id, queue_id);
+ }
+
+ return 0;
+}
+
+static void turn_on_intr(struct lcore_conf *qconf)
+{
+ int i;
+ struct lcore_rx_queue *rx_queue;
+ uint8_t port_id, queue_id;
+
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+ rx_queue = &(qconf->rx_queue_list[i]);
+ port_id = rx_queue->port_id;
+ queue_id = rx_queue->queue_id;
+
+ rte_spinlock_lock(&(locks[port_id]));
+ rte_eth_dev_rx_intr_enable(port_id, queue_id);
+ rte_spinlock_unlock(&(locks[port_id]));
+ }
+}
+
+static int event_register(struct lcore_conf *qconf)
+{
+ struct lcore_rx_queue *rx_queue;
+ uint8_t portid, queueid;
+ uint32_t data;
+ int ret;
+ int i;
+
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+ rx_queue = &(qconf->rx_queue_list[i]);
+ portid = rx_queue->port_id;
+ queueid = rx_queue->queue_id;
+ data = portid << CHAR_BIT | queueid;
+
+ ret = rte_eth_dev_rx_intr_ctl_q(portid, queueid,
+ RTE_EPOLL_PER_THREAD,
+ RTE_INTR_EVENT_ADD,
+ (void *)((uintptr_t)data));
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/* main processing loop */
+static int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ uint64_t prev_tsc_power = 0, cur_tsc_power, diff_tsc_power;
+ int i, j, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ struct lcore_rx_queue *rx_queue;
+ enum freq_scale_hint_t lcore_scaleup_hint;
+ uint32_t lcore_rx_idle_count = 0;
+ uint32_t lcore_idle_hint = 0;
+ int intr_en = 0;
+
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD_POWER, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD_POWER, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD_POWER, " -- lcoreid=%u portid=%hhu "
+ "rxqueueid=%hhu\n", lcore_id, portid, queueid);
+ }
+
+ /* add into event wait list */
+ if (event_register(qconf) == 0)
+ intr_en = 1;
+ else
+ RTE_LOG(INFO, L3FWD_POWER, "RX interrupt won't enable.\n");
+
+ while (1) {
+ stats[lcore_id].nb_iteration_looped++;
+
+ cur_tsc = rte_rdtsc();
+ cur_tsc_power = cur_tsc;
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+ for (i = 0; i < qconf->n_tx_port; ++i) {
+ portid = qconf->tx_port_id[i];
+ rte_eth_tx_buffer_flush(portid,
+ qconf->tx_queue_id[portid],
+ qconf->tx_buffer[portid]);
+ }
+ prev_tsc = cur_tsc;
+ }
+
+ diff_tsc_power = cur_tsc_power - prev_tsc_power;
+ if (diff_tsc_power > TIMER_RESOLUTION_CYCLES) {
+ rte_timer_manage();
+ prev_tsc_power = cur_tsc_power;
+ }
+
+start_rx:
+ /*
+ * Read packet from RX queues
+ */
+ lcore_scaleup_hint = FREQ_CURRENT;
+ lcore_rx_idle_count = 0;
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+ rx_queue = &(qconf->rx_queue_list[i]);
+ rx_queue->idle_hint = 0;
+ portid = rx_queue->port_id;
+ queueid = rx_queue->queue_id;
+
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+
+ stats[lcore_id].nb_rx_processed += nb_rx;
+ if (unlikely(nb_rx == 0)) {
+ /**
+ * no packet received from rx queue, try to
+ * sleep for a while forcing CPU enter deeper
+ * C states.
+ */
+ rx_queue->zero_rx_packet_count++;
+
+ if (rx_queue->zero_rx_packet_count <=
+ MIN_ZERO_POLL_COUNT)
+ continue;
+
+ rx_queue->idle_hint = power_idle_heuristic(\
+ rx_queue->zero_rx_packet_count);
+ lcore_rx_idle_count++;
+ } else {
+ rx_queue->zero_rx_packet_count = 0;
+
+ /**
+ * do not scale up frequency immediately as
+ * user to kernel space communication is costly
+ * which might impact packet I/O for received
+ * packets.
+ */
+ rx_queue->freq_up_hint =
+ power_freq_scaleup_heuristic(lcore_id,
+ portid, queueid);
+ }
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_burst[j], void *));
+ }
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_simple_forward(pkts_burst[j], portid,
+ qconf);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++) {
+ l3fwd_simple_forward(pkts_burst[j], portid,
+ qconf);
+ }
+ }
+
+ if (likely(lcore_rx_idle_count != qconf->n_rx_queue)) {
+ for (i = 1, lcore_scaleup_hint =
+ qconf->rx_queue_list[0].freq_up_hint;
+ i < qconf->n_rx_queue; ++i) {
+ rx_queue = &(qconf->rx_queue_list[i]);
+ if (rx_queue->freq_up_hint >
+ lcore_scaleup_hint)
+ lcore_scaleup_hint =
+ rx_queue->freq_up_hint;
+ }
+
+ if (lcore_scaleup_hint == FREQ_HIGHEST) {
+ if (rte_power_freq_max)
+ rte_power_freq_max(lcore_id);
+ } else if (lcore_scaleup_hint == FREQ_HIGHER) {
+ if (rte_power_freq_up)
+ rte_power_freq_up(lcore_id);
+ }
+ } else {
+ /**
+ * All Rx queues empty in recent consecutive polls,
+ * sleep in a conservative manner, meaning sleep as
+ * less as possible.
+ */
+ for (i = 1, lcore_idle_hint =
+ qconf->rx_queue_list[0].idle_hint;
+ i < qconf->n_rx_queue; ++i) {
+ rx_queue = &(qconf->rx_queue_list[i]);
+ if (rx_queue->idle_hint < lcore_idle_hint)
+ lcore_idle_hint = rx_queue->idle_hint;
+ }
+
+ if (lcore_idle_hint < SUSPEND_THRESHOLD)
+ /**
+ * execute "pause" instruction to avoid context
+ * switch which generally take hundred of
+ * microseconds for short sleep.
+ */
+ rte_delay_us(lcore_idle_hint);
+ else {
+ /* suspend until rx interrupt trigges */
+ if (intr_en) {
+ turn_on_intr(qconf);
+ sleep_until_rx_interrupt(
+ qconf->n_rx_queue);
+ }
+ /* start receiving packets immediately */
+ goto start_rx;
+ }
+ stats[lcore_id].sleep_time += lcore_idle_hint;
+ }
+ }
+}
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i;
+ int socketid;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ queue = lcore_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ printf("invalid queue number: %hhu\n", queue);
+ return -1;
+ }
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in lcore "
+ "mask\n", lcore);
+ return -1;
+ }
+ if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
+ (numa_on == 0)) {
+ printf("warning: lcore %hhu is on socket %d with numa "
+ "off\n", lcore, socketid);
+ }
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ portid = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n",
+ portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n",
+ portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port &&
+ lcore_params[i].queue_id > queue)
+ queue = lcore_params[i].queue_id;
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)lcore);
+ return -1;
+ } else {
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf ("%s [EAL options] -- -p PORTMASK -P"
+ " [--config (port,queue,lcore)[,(port,queue,lcore]]"
+ " [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -P : enable promiscuous mode\n"
+ " --config (port,queue,lcore): rx queues configuration\n"
+ " --no-numa: optional, disable numa awareness\n"
+ " --enable-jumbo: enable jumbo frame"
+ " which max packet len is PKTLEN in decimal (64-9600)\n",
+ prgname);
+}
+
+static int parse_max_pkt_len(const char *pktlen)
+{
+ char *end = NULL;
+ unsigned long len;
+
+ /* parse decimal string */
+ len = strtoul(pktlen, &end, 10);
+ if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ return len;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0,'(')) != NULL) {
+ ++p;
+ if((p0 = strchr(p,')')) == NULL)
+ return -1;
+
+ size = p0 - p;
+ if(size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') !=
+ _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++){
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] >
+ 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"config", 1, 0, 0},
+ {"no-numa", 0, 0, 0},
+ {"enable-jumbo", 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:P",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'P':
+ printf("Promiscuous mode selected\n");
+ promiscuous_on = 1;
+ break;
+
+ /* long options */
+ case 0:
+ if (!strncmp(lgopts[option_index].name, "config", 6)) {
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ "no-numa", 7)) {
+ printf("numa is disabled \n");
+ numa_on = 0;
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ "enable-jumbo", 12)) {
+ struct option lenopts =
+ {"max-pkt-len", required_argument, \
+ 0, 0};
+
+ printf("jumbo frame is enabled \n");
+ port_conf.rxmode.jumbo_frame = 1;
+
+ /**
+ * if no max-pkt-len set, use the default value
+ * ETHER_MAX_LEN
+ */
+ if (0 == getopt_long(argc, argvopt, "",
+ &lenopts, &option_index)) {
+ ret = parse_max_pkt_len(optarg);
+ if ((ret < 64) ||
+ (ret > MAX_JUMBO_PKT_LEN)){
+ printf("invalid packet "
+ "length\n");
+ print_usage(prgname);
+ return -1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = ret;
+ }
+ printf("set jumbo frame "
+ "max packet length to %u\n",
+ (unsigned int)port_conf.rxmode.max_rx_pkt_len);
+ }
+
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static void
+setup_hash(int socketid)
+{
+ struct rte_hash_parameters ipv4_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(struct ipv4_5tuple),
+ .hash_func = DEFAULT_HASH_FUNC,
+ .hash_func_init_val = 0,
+ };
+
+ struct rte_hash_parameters ipv6_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(struct ipv6_5tuple),
+ .hash_func = DEFAULT_HASH_FUNC,
+ .hash_func_init_val = 0,
+ };
+
+ unsigned i;
+ int ret;
+ char s[64];
+
+ /* create ipv4 hash */
+ snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+ ipv4_l3fwd_hash_params.name = s;
+ ipv4_l3fwd_hash_params.socket_id = socketid;
+ ipv4_l3fwd_lookup_struct[socketid] =
+ rte_hash_create(&ipv4_l3fwd_hash_params);
+ if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
+ "socket %d\n", socketid);
+
+ /* create ipv6 hash */
+ snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+ ipv6_l3fwd_hash_params.name = s;
+ ipv6_l3fwd_hash_params.socket_id = socketid;
+ ipv6_l3fwd_lookup_struct[socketid] =
+ rte_hash_create(&ipv6_l3fwd_hash_params);
+ if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
+ "socket %d\n", socketid);
+
+
+ /* populate the ipv4 hash */
+ for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
+ ret = rte_hash_add_key (ipv4_l3fwd_lookup_struct[socketid],
+ (void *) &ipv4_l3fwd_route_array[i].key);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
+ "l3fwd hash on socket %d\n", i, socketid);
+ }
+ ipv4_l3fwd_out_if[ret] = ipv4_l3fwd_route_array[i].if_out;
+ printf("Hash: Adding key\n");
+ print_ipv4_key(ipv4_l3fwd_route_array[i].key);
+ }
+
+ /* populate the ipv6 hash */
+ for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
+ ret = rte_hash_add_key (ipv6_l3fwd_lookup_struct[socketid],
+ (void *) &ipv6_l3fwd_route_array[i].key);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
+ "l3fwd hash on socket %d\n", i, socketid);
+ }
+ ipv6_l3fwd_out_if[ret] = ipv6_l3fwd_route_array[i].if_out;
+ printf("Hash: Adding key\n");
+ print_ipv6_key(ipv6_l3fwd_route_array[i].key);
+ }
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+static void
+setup_lpm(int socketid)
+{
+ unsigned i;
+ int ret;
+ char s[64];
+
+ /* create the LPM table */
+ struct rte_lpm_config lpm_ipv4_config;
+
+ lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
+ lpm_ipv4_config.number_tbl8s = 256;
+ lpm_ipv4_config.flags = 0;
+
+ snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
+ ipv4_l3fwd_lookup_struct[socketid] =
+ rte_lpm_create(s, socketid, &lpm_ipv4_config);
+ if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
+ " on socket %d\n", socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
+ ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
+ ipv4_l3fwd_route_array[i].ip,
+ ipv4_l3fwd_route_array[i].depth,
+ ipv4_l3fwd_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
+ "l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route 0x%08x / %d (%d)\n",
+ (unsigned)ipv4_l3fwd_route_array[i].ip,
+ ipv4_l3fwd_route_array[i].depth,
+ ipv4_l3fwd_route_array[i].if_out);
+ }
+}
+#endif
+
+static int
+init_mem(unsigned nb_mbuf)
+{
+ struct lcore_conf *qconf;
+ int socketid;
+ unsigned lcore_id;
+ char s[64];
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ if (socketid >= NB_SOCKETS) {
+ rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is "
+ "out of range %d\n", socketid,
+ lcore_id, NB_SOCKETS);
+ }
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] =
+ rte_pktmbuf_pool_create(s, nb_mbuf,
+ MEMPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ socketid);
+ if (pktmbuf_pool[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Cannot init mbuf pool on socket %d\n",
+ socketid);
+ else
+ printf("Allocated mbuf pool on socket %d\n",
+ socketid);
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+ setup_lpm(socketid);
+#else
+ setup_hash(socketid);
+#endif
+ }
+ qconf = &lcore_conf[lcore_id];
+ qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+ qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
+#endif
+ }
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id;
+ uint64_t hz;
+ uint32_t n_tx_queue, nb_lcores;
+ uint32_t dev_rxq_num, dev_txq_num;
+ uint8_t portid, nb_rx_queue, queue, socketid;
+
+ /* catch SIGINT and restore cpufreq governor to ondemand */
+ signal(SIGINT, signal_exit_now);
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* init RTE timer library to be used late */
+ rte_timer_subsystem_init();
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_exit(EXIT_FAILURE, "check_port_config failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid );
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ dev_rxq_num = dev_info.max_rx_queues;
+ dev_txq_num = dev_info.max_tx_queues;
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ if (nb_rx_queue > dev_rxq_num)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure not existed rxq: "
+ "port=%d\n", portid);
+
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > dev_txq_num)
+ n_tx_queue = dev_txq_num;
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)n_tx_queue );
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)n_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: "
+ "err=%d, port=%d\n", ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+
+ /* init memory */
+ ret = init_mem(NB_MBUF);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_mem failed\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ /* Initialize TX buffers */
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (qconf->tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Can't allocate tx buffer for port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(qconf->tx_buffer[portid], MAX_PKT_BURST);
+ }
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (queueid >= dev_txq_num)
+ continue;
+
+ if (numa_on)
+ socketid = \
+ (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (port_conf.rxmode.jumbo_frame)
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socketid, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+
+ qconf->tx_port_id[qconf->n_tx_port] = portid;
+ qconf->n_tx_port++;
+ }
+ printf("\n");
+ }
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ /* init power management library */
+ ret = rte_power_init(lcore_id);
+ if (ret)
+ RTE_LOG(ERR, POWER,
+ "Library initialization failed on core %u\n", lcore_id);
+
+ /* init timer structures for each enabled lcore */
+ rte_timer_init(&power_timers[lcore_id]);
+ hz = rte_get_timer_hz();
+ rte_timer_reset(&power_timers[lcore_id],
+ hz/TIMER_NUMBER_PER_SECOND, SINGLE, lcore_id,
+ power_timer_cb, NULL);
+
+ qconf = &lcore_conf[lcore_id];
+ printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
+ fflush(stdout);
+ /* init RX queues */
+ for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+
+ if (numa_on)
+ socketid = \
+ (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid, NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+ }
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ continue;
+ }
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, "
+ "port=%d\n", ret, portid);
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ /* initialize spinlock for each port */
+ rte_spinlock_init(&(locks[portid]));
+ }
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l3fwd-vf/Makefile b/examples/l3fwd-vf/Makefile
new file mode 100644
index 00000000..d97611cf
--- /dev/null
+++ b/examples/l3fwd-vf/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l3fwd-vf
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3 $(USER_FLAGS)
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l3fwd-vf/main.c b/examples/l3fwd-vf/main.c
new file mode 100644
index 00000000..034c22a7
--- /dev/null
+++ b/examples/l3fwd-vf/main.c
@@ -0,0 +1,1097 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_spinlock.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+
+#define APP_LOOKUP_EXACT_MATCH 0
+#define APP_LOOKUP_LPM 1
+#define DO_RFC_1812_CHECKS
+
+//#define APP_LOOKUP_METHOD APP_LOOKUP_EXACT_MATCH
+#ifndef APP_LOOKUP_METHOD
+#define APP_LOOKUP_METHOD APP_LOOKUP_LPM
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+#include <rte_hash.h>
+#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+#include <rte_lpm.h>
+#else
+#error "APP_LOOKUP_METHOD set to incorrect value"
+#endif
+
+#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
+
+#define MEMPOOL_CACHE_SIZE 256
+
+/*
+ * This expression is used to calculate the number of mbufs needed depending on user input, taking
+ * into account memory for rx and tx hardware rings, cache per lcore and mtable per port per lcore.
+ * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum value of 8192
+ */
+
+#define NB_MBUF RTE_MAX ( \
+ (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
+ nb_ports*nb_lcores*MAX_PKT_BURST + \
+ nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
+ (unsigned)8192)
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+
+/*
+ * These default values are optimized for use with the Intel(R) 82599 10 GbE
+ * Controller and the DPDK ixgbe PMD. Consider using other values for other
+ * network controllers and/or network drivers.
+ */
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define NB_SOCKETS 8
+
+#define SOCKET0 0
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+static int numa_on = 1; /**< NUMA is enabled by default. */
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 1
+#define MAX_RX_QUEUE_PER_PORT 1
+
+#define MAX_LCORE_PARAMS 1024
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_array_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static struct lcore_params * lcore_params = lcore_params_array_default;
+static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
+ sizeof(lcore_params_array_default[0]);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
+
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC rte_jhash
+#endif
+
+struct ipv4_5tuple {
+ uint32_t ip_dst;
+ uint32_t ip_src;
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+struct l3fwd_route {
+ struct ipv4_5tuple key;
+ uint8_t if_out;
+};
+
+static struct l3fwd_route l3fwd_route_array[] = {
+ {{IPv4(100,10,0,1), IPv4(200,10,0,1), 101, 11, IPPROTO_TCP}, 0},
+ {{IPv4(100,20,0,2), IPv4(200,20,0,2), 102, 12, IPPROTO_TCP}, 1},
+ {{IPv4(100,30,0,3), IPv4(200,30,0,3), 103, 13, IPPROTO_TCP}, 2},
+ {{IPv4(100,40,0,4), IPv4(200,40,0,4), 104, 14, IPPROTO_TCP}, 3},
+};
+
+typedef struct rte_hash lookup_struct_t;
+static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
+
+#define L3FWD_HASH_ENTRIES 1024
+struct rte_hash_parameters l3fwd_hash_params = {
+ .name = "l3fwd_hash_0",
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(struct ipv4_5tuple),
+ .hash_func = DEFAULT_HASH_FUNC,
+ .hash_func_init_val = 0,
+ .socket_id = SOCKET0,
+};
+
+#define L3FWD_NUM_ROUTES \
+ (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
+
+static uint8_t l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+struct l3fwd_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct l3fwd_route l3fwd_route_array[] = {
+ {IPv4(1,1,1,0), 24, 0},
+ {IPv4(2,1,1,0), 24, 1},
+ {IPv4(3,1,1,0), 24, 2},
+ {IPv4(4,1,1,0), 24, 3},
+ {IPv4(5,1,1,0), 24, 4},
+ {IPv4(6,1,1,0), 24, 5},
+ {IPv4(7,1,1,0), 24, 6},
+ {IPv4(8,1,1,0), 24, 7},
+};
+
+#define L3FWD_NUM_ROUTES \
+ (sizeof(l3fwd_route_array) / sizeof(l3fwd_route_array[0]))
+
+#define L3FWD_LPM_MAX_RULES 1024
+
+typedef struct rte_lpm lookup_struct_t;
+static lookup_struct_t *l3fwd_lookup_struct[NB_SOCKETS];
+#endif
+
+struct lcore_conf {
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t tx_queue_id;
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+ lookup_struct_t * lookup_struct;
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+static rte_spinlock_t spinlock_conf[RTE_MAX_ETHPORTS] = {RTE_SPINLOCK_INITIALIZER};
+/* Send burst of packets on an output interface */
+static inline int
+send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ int ret;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id;
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ rte_spinlock_lock(&spinlock_conf[port]);
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ rte_spinlock_unlock(&spinlock_conf[port]);
+
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint32_t lcore_id;
+ uint16_t len;
+ struct lcore_conf *qconf;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_conf[lcore_id];
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static void
+print_key(struct ipv4_5tuple key)
+{
+ printf("IP dst = %08x, IP src = %08x, port dst = %d, port src = %d, proto = %d\n",
+ (unsigned)key.ip_dst, (unsigned)key.ip_src, key.port_dst, key.port_src, key.proto);
+}
+
+static inline uint8_t
+get_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
+{
+ struct ipv4_5tuple key;
+ struct tcp_hdr *tcp;
+ struct udp_hdr *udp;
+ int ret = 0;
+
+ key.ip_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+ key.ip_src = rte_be_to_cpu_32(ipv4_hdr->src_addr);
+ key.proto = ipv4_hdr->next_proto_id;
+
+ switch (ipv4_hdr->next_proto_id) {
+ case IPPROTO_TCP:
+ tcp = (struct tcp_hdr *)((unsigned char *) ipv4_hdr +
+ sizeof(struct ipv4_hdr));
+ key.port_dst = rte_be_to_cpu_16(tcp->dst_port);
+ key.port_src = rte_be_to_cpu_16(tcp->src_port);
+ break;
+
+ case IPPROTO_UDP:
+ udp = (struct udp_hdr *)((unsigned char *) ipv4_hdr +
+ sizeof(struct ipv4_hdr));
+ key.port_dst = rte_be_to_cpu_16(udp->dst_port);
+ key.port_src = rte_be_to_cpu_16(udp->src_port);
+ break;
+
+ default:
+ key.port_dst = 0;
+ key.port_src = 0;
+ }
+
+ /* Find destination port */
+ ret = rte_hash_lookup(l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0)? portid : l3fwd_out_if[ret]);
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+static inline uint8_t
+get_dst_port(struct ipv4_hdr *ipv4_hdr, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
+{
+ uint32_t next_hop;
+
+ return (uint8_t) ((rte_lpm_lookup(l3fwd_lookup_struct,
+ rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop) == 0)?
+ next_hop : portid);
+}
+#endif
+
+static inline void
+l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid, lookup_struct_t * l3fwd_lookup_struct)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ void *tmp;
+ uint8_t dst_port;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+
+ dst_port = get_dst_port(ipv4_hdr, portid, l3fwd_lookup_struct);
+ if (dst_port >= RTE_MAX_ETHPORTS || (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth_hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+
+}
+
+/* main processing loop */
+static int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, j, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n", lcore_id,
+ portid, queueid);
+ }
+
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ /*
+ * This could be optimized (use queueid instead of
+ * portid), but it is not called so often
+ */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (qconf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(&lcore_conf[lcore_id],
+ qconf->tx_mbufs[portid].len,
+ portid);
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst, MAX_PKT_BURST);
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(
+ pkts_burst[j], void *));
+ }
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++) {
+ l3fwd_simple_forward(pkts_burst[j], portid, qconf->lookup_struct);
+ }
+ }
+ }
+}
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i;
+ int socketid;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ queue = lcore_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ printf("invalid queue number: %hhu\n", queue);
+ return -1;
+ }
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
+ return -1;
+ }
+ if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
+ (numa_on == 0)) {
+ printf("warning: lcore %hhu is on socket %d with numa off \n",
+ lcore, socketid);
+ }
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ portid = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port && lcore_params[i].queue_id > queue)
+ queue = lcore_params[i].queue_id;
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)lcore);
+ return -1;
+ } else {
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf ("%s [EAL options] -- -p PORTMASK"
+ " [--config (port,queue,lcore)[,(port,queue,lcore]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " --config (port,queue,lcore): rx queues configuration\n"
+ " --no-numa: optional, disable numa awareness\n",
+ prgname);
+}
+
+/* Custom handling of signals to handle process terminal */
+static void
+signal_handler(int signum)
+{
+ uint8_t portid;
+ uint8_t nb_ports = rte_eth_dev_count();
+
+ /* When we receive a SIGINT signal */
+ if (signum == SIGINT) {
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ rte_eth_dev_close(portid);
+ }
+ }
+ rte_exit(EXIT_SUCCESS, "\n User forced exit\n");
+}
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0,'(')) != NULL) {
+ ++p;
+ if((p0 = strchr(p,')')) == NULL)
+ return -1;
+
+ size = p0 - p;
+ if(size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++){
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id = (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id = (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id = (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"config", 1, 0, 0},
+ {"no-numa", 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ if (!strcmp(lgopts[option_index].name, "config")) {
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("invalid config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strcmp(lgopts[option_index].name, "no-numa")) {
+ printf("numa is disabled \n");
+ numa_on = 0;
+ }
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static void
+setup_hash(int socketid)
+{
+ unsigned i;
+ int ret;
+ char s[64];
+
+ /* create hashes */
+ snprintf(s, sizeof(s), "l3fwd_hash_%d", socketid);
+ l3fwd_hash_params.name = s;
+ l3fwd_hash_params.socket_id = socketid;
+ l3fwd_lookup_struct[socketid] = rte_hash_create(&l3fwd_hash_params);
+ if (l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
+ "socket %d\n", socketid);
+
+ /* populate the hash */
+ for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
+ ret = rte_hash_add_key (l3fwd_lookup_struct[socketid],
+ (void *) &l3fwd_route_array[i].key);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the"
+ "l3fwd hash on socket %d\n", i, socketid);
+ }
+ l3fwd_out_if[ret] = l3fwd_route_array[i].if_out;
+ printf("Hash: Adding key\n");
+ print_key(l3fwd_route_array[i].key);
+ }
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+static void
+setup_lpm(int socketid)
+{
+ unsigned i;
+ int ret;
+ char s[64];
+
+ struct rte_lpm_config lpm_ipv4_config;
+
+ lpm_ipv4_config.max_rules = L3FWD_LPM_MAX_RULES;
+ lpm_ipv4_config.number_tbl8s = 256;
+ lpm_ipv4_config.flags = 0;
+
+ /* create the LPM table */
+ snprintf(s, sizeof(s), "L3FWD_LPM_%d", socketid);
+ l3fwd_lookup_struct[socketid] =
+ rte_lpm_create(s, socketid, &lpm_ipv4_config);
+ if (l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
+ " on socket %d\n", socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < L3FWD_NUM_ROUTES; i++) {
+ ret = rte_lpm_add(l3fwd_lookup_struct[socketid],
+ l3fwd_route_array[i].ip,
+ l3fwd_route_array[i].depth,
+ l3fwd_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
+ "l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route 0x%08x / %d (%d)\n",
+ (unsigned)l3fwd_route_array[i].ip,
+ l3fwd_route_array[i].depth,
+ l3fwd_route_array[i].if_out);
+ }
+}
+#endif
+
+static int
+init_mem(unsigned nb_mbuf)
+{
+ struct lcore_conf *qconf;
+ int socketid;
+ unsigned lcore_id;
+ char s[64];
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ if (socketid >= NB_SOCKETS) {
+ rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
+ socketid, lcore_id, NB_SOCKETS);
+ }
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] = rte_pktmbuf_pool_create(s,
+ nb_mbuf, MEMPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+ if (pktmbuf_pool[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool on socket %d\n", socketid);
+ else
+ printf("Allocated mbuf pool on socket %d\n", socketid);
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+ setup_lpm(socketid);
+#else
+ setup_hash(socketid);
+#endif
+ }
+ qconf = &lcore_conf[lcore_id];
+ qconf->lookup_struct = l3fwd_lookup_struct[socketid];
+ }
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id;
+ uint32_t nb_lcores;
+ uint16_t n_tx_queue;
+ uint8_t portid, nb_rx_queue, queue, socketid;
+
+ signal(SIGINT, signal_handler);
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD-VF parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_exit(EXIT_FAILURE, "check_port_config failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid );
+ fflush(stdout);
+
+ /* must always equal(=1) */
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)1 );
+ ret = rte_eth_dev_configure(portid, nb_rx_queue, n_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+
+ ret = init_mem(NB_MBUF);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_mem failed\n");
+
+ /* init one TX queue */
+ socketid = (uint8_t)rte_lcore_to_socket_id(rte_get_master_lcore());
+
+ printf("txq=%d,%d,%d ", portid, 0, socketid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (port_conf.rxmode.jumbo_frame)
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ socketid, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ printf("\n");
+ }
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_queue_id = 0;
+
+ printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
+ fflush(stdout);
+ /* init RX queues */
+ for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+
+ if (numa_on)
+ socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid, NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d,"
+ "port=%d\n", ret, portid);
+ }
+ }
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ continue;
+ }
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ printf("done: Port %d\n", portid);
+
+ }
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/l3fwd/Makefile b/examples/l3fwd/Makefile
new file mode 100644
index 00000000..5ce0ce05
--- /dev/null
+++ b/examples/l3fwd/Makefile
@@ -0,0 +1,51 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l3fwd
+
+# all source are stored in SRCS-y
+SRCS-y := main.c l3fwd_lpm.c l3fwd_em.c
+
+CFLAGS += -I$(SRCDIR)
+CFLAGS += -O3 $(USER_FLAGS)
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/l3fwd/l3fwd.h b/examples/l3fwd/l3fwd.h
new file mode 100644
index 00000000..d8798b7d
--- /dev/null
+++ b/examples/l3fwd/l3fwd.h
@@ -0,0 +1,241 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3_FWD_H__
+#define __L3_FWD_H__
+
+#include <rte_vect.h>
+
+#define DO_RFC_1812_CHECKS
+
+#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
+
+#if !defined(NO_HASH_MULTI_LOOKUP) && defined(RTE_MACHINE_CPUFLAG_NEON)
+#define NO_HASH_MULTI_LOOKUP 1
+#endif
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+
+/*
+ * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
+ */
+#define MAX_TX_BURST (MAX_PKT_BURST / 2)
+
+#define NB_SOCKETS 8
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/* Used to mark destination port as 'invalid'. */
+#define BAD_PORT ((uint16_t)-1)
+
+#define FWDSTEP 4
+
+/* replace first 12B of the ethernet header. */
+#define MASK_ETH 0x3f
+
+/* Hash parameters. */
+#ifdef RTE_ARCH_X86_64
+/* default to 4 million hash entries (approx) */
+#define L3FWD_HASH_ENTRIES (1024*1024*4)
+#else
+/* 32-bit has less address-space for hugepage memory, limit to 1M entries */
+#define L3FWD_HASH_ENTRIES (1024*1024*1)
+#endif
+#define HASH_ENTRY_NUMBER_DEFAULT 4
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+struct lcore_conf {
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+ uint16_t n_tx_port;
+ uint16_t tx_port_id[RTE_MAX_ETHPORTS];
+ uint16_t tx_queue_id[RTE_MAX_ETHPORTS];
+ struct mbuf_table tx_mbufs[RTE_MAX_ETHPORTS];
+ void *ipv4_lookup_struct;
+ void *ipv6_lookup_struct;
+} __rte_cache_aligned;
+
+extern volatile bool force_quit;
+
+/* ethernet addresses of ports */
+extern uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
+extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+extern uint32_t enabled_port_mask;
+
+/* Used only in exact match mode. */
+extern int ipv6; /**< ipv6 is false by default. */
+extern uint32_t hash_entry_number;
+
+extern xmm_t val_eth[RTE_MAX_ETHPORTS];
+
+extern struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+/* Send burst of packets on an output interface */
+static inline int
+send_burst(struct lcore_conf *qconf, uint16_t n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ int ret;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int
+send_single_packet(struct lcore_conf *qconf,
+ struct rte_mbuf *m, uint8_t port)
+{
+ uint16_t len;
+
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif /* DO_RFC_1812_CHECKS */
+
+/* Function pointers for LPM or EM functionality. */
+void
+setup_lpm(const int socketid);
+
+void
+setup_hash(const int socketid);
+
+int
+em_check_ptype(int portid);
+
+int
+lpm_check_ptype(int portid);
+
+uint16_t
+em_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
+
+uint16_t
+lpm_cb_parse_ptype(uint8_t port, uint16_t queue, struct rte_mbuf *pkts[],
+ uint16_t nb_pkts, uint16_t max_pkts, void *user_param);
+
+int
+em_main_loop(__attribute__((unused)) void *dummy);
+
+int
+lpm_main_loop(__attribute__((unused)) void *dummy);
+
+/* Return ipv4/ipv6 fwd lookup struct for LPM or EM. */
+void *
+em_get_ipv4_l3fwd_lookup_struct(const int socketid);
+
+void *
+em_get_ipv6_l3fwd_lookup_struct(const int socketid);
+
+void *
+lpm_get_ipv4_l3fwd_lookup_struct(const int socketid);
+
+void *
+lpm_get_ipv6_l3fwd_lookup_struct(const int socketid);
+
+#endif /* __L3_FWD_H__ */
diff --git a/examples/l3fwd/l3fwd_em.c b/examples/l3fwd/l3fwd_em.c
new file mode 100644
index 00000000..fc59243d
--- /dev/null
+++ b/examples/l3fwd/l3fwd_em.c
@@ -0,0 +1,801 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <netinet/in.h>
+
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_cycles.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_hash.h>
+
+#include "l3fwd.h"
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC rte_jhash
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+#define IPV6_ADDR_LEN 16
+
+struct ipv4_5tuple {
+ uint32_t ip_dst;
+ uint32_t ip_src;
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+union ipv4_5tuple_host {
+ struct {
+ uint8_t pad0;
+ uint8_t proto;
+ uint16_t pad1;
+ uint32_t ip_src;
+ uint32_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ };
+ xmm_t xmm;
+};
+
+#define XMM_NUM_IN_IPV6_5TUPLE 3
+
+struct ipv6_5tuple {
+ uint8_t ip_dst[IPV6_ADDR_LEN];
+ uint8_t ip_src[IPV6_ADDR_LEN];
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+union ipv6_5tuple_host {
+ struct {
+ uint16_t pad0;
+ uint8_t proto;
+ uint8_t pad1;
+ uint8_t ip_src[IPV6_ADDR_LEN];
+ uint8_t ip_dst[IPV6_ADDR_LEN];
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint64_t reserve;
+ };
+ xmm_t xmm[XMM_NUM_IN_IPV6_5TUPLE];
+};
+
+
+
+struct ipv4_l3fwd_em_route {
+ struct ipv4_5tuple key;
+ uint8_t if_out;
+};
+
+struct ipv6_l3fwd_em_route {
+ struct ipv6_5tuple key;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_em_route ipv4_l3fwd_em_route_array[] = {
+ {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0},
+ {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1},
+ {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2},
+ {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3},
+};
+
+static struct ipv6_l3fwd_em_route ipv6_l3fwd_em_route_array[] = {
+ {{
+ {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
+ 101, 11, IPPROTO_TCP}, 0},
+
+ {{
+ {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
+ 102, 12, IPPROTO_TCP}, 1},
+
+ {{
+ {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
+ 101, 11, IPPROTO_TCP}, 2},
+
+ {{
+ {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38, 0x05},
+ 102, 12, IPPROTO_TCP}, 3},
+};
+
+struct rte_hash *ipv4_l3fwd_em_lookup_struct[NB_SOCKETS];
+struct rte_hash *ipv6_l3fwd_em_lookup_struct[NB_SOCKETS];
+
+static inline uint32_t
+ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
+ uint32_t init_val)
+{
+ const union ipv4_5tuple_host *k;
+ uint32_t t;
+ const uint32_t *p;
+
+ k = data;
+ t = k->proto;
+ p = (const uint32_t *)&k->port_src;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ init_val = rte_hash_crc_4byte(t, init_val);
+ init_val = rte_hash_crc_4byte(k->ip_src, init_val);
+ init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
+ init_val = rte_hash_crc_4byte(*p, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ init_val = rte_jhash_1word(t, init_val);
+ init_val = rte_jhash_1word(k->ip_src, init_val);
+ init_val = rte_jhash_1word(k->ip_dst, init_val);
+ init_val = rte_jhash_1word(*p, init_val);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+ return init_val;
+}
+
+static inline uint32_t
+ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
+ uint32_t init_val)
+{
+ const union ipv6_5tuple_host *k;
+ uint32_t t;
+ const uint32_t *p;
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
+ const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+ k = data;
+ t = k->proto;
+ p = (const uint32_t *)&k->port_src;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ ip_src0 = (const uint32_t *) k->ip_src;
+ ip_src1 = (const uint32_t *)(k->ip_src+4);
+ ip_src2 = (const uint32_t *)(k->ip_src+8);
+ ip_src3 = (const uint32_t *)(k->ip_src+12);
+ ip_dst0 = (const uint32_t *) k->ip_dst;
+ ip_dst1 = (const uint32_t *)(k->ip_dst+4);
+ ip_dst2 = (const uint32_t *)(k->ip_dst+8);
+ ip_dst3 = (const uint32_t *)(k->ip_dst+12);
+ init_val = rte_hash_crc_4byte(t, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src0, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src1, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src2, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src3, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
+ init_val = rte_hash_crc_4byte(*p, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ init_val = rte_jhash_1word(t, init_val);
+ init_val = rte_jhash(k->ip_src,
+ sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
+ init_val = rte_jhash(k->ip_dst,
+ sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
+ init_val = rte_jhash_1word(*p, init_val);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ return init_val;
+}
+
+#define IPV4_L3FWD_EM_NUM_ROUTES \
+ (sizeof(ipv4_l3fwd_em_route_array) / sizeof(ipv4_l3fwd_em_route_array[0]))
+
+#define IPV6_L3FWD_EM_NUM_ROUTES \
+ (sizeof(ipv6_l3fwd_em_route_array) / sizeof(ipv6_l3fwd_em_route_array[0]))
+
+static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+
+static rte_xmm_t mask0;
+static rte_xmm_t mask1;
+static rte_xmm_t mask2;
+
+#if defined(__SSE2__)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+ __m128i data = _mm_loadu_si128((__m128i *)(key));
+
+ return _mm_and_si128(data, mask);
+}
+#elif defined(RTE_MACHINE_CPUFLAG_NEON)
+static inline xmm_t
+em_mask_key(void *key, xmm_t mask)
+{
+ int32x4_t data = vld1q_s32((int32_t *)key);
+
+ return vandq_s32(data, mask);
+}
+#endif
+
+static inline uint8_t
+em_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct)
+{
+ int ret = 0;
+ union ipv4_5tuple_host key;
+ struct rte_hash *ipv4_l3fwd_lookup_struct =
+ (struct rte_hash *)lookup_struct;
+
+ ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
+
+ /*
+ * Get 5 tuple: dst port, src port, dst IP address,
+ * src IP address and protocol.
+ */
+ key.xmm = em_mask_key(ipv4_hdr, mask0.x);
+
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
+}
+
+static inline uint8_t
+em_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct)
+{
+ int ret = 0;
+ union ipv6_5tuple_host key;
+ struct rte_hash *ipv6_l3fwd_lookup_struct =
+ (struct rte_hash *)lookup_struct;
+
+ ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
+ void *data0 = ipv6_hdr;
+ void *data1 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t);
+ void *data2 = ((uint8_t *)ipv6_hdr) + sizeof(xmm_t) + sizeof(xmm_t);
+
+ /* Get part of 5 tuple: src IP address lower 96 bits and protocol */
+ key.xmm[0] = em_mask_key(data0, mask1.x);
+
+ /*
+ * Get part of 5 tuple: dst IP address lower 96 bits
+ * and src IP address higher 32 bits.
+ */
+ key.xmm[1] = *(xmm_t *)data1;
+
+ /*
+ * Get part of 5 tuple: dst port and src port
+ * and dst IP address higher 32 bits.
+ */
+ key.xmm[2] = em_mask_key(data2, mask2.x);
+
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
+}
+
+#if defined(__SSE4_1__)
+#if defined(NO_HASH_MULTI_LOOKUP)
+#include "l3fwd_em_sse.h"
+#else
+#include "l3fwd_em_hlm_sse.h"
+#endif
+#else
+#include "l3fwd_em.h"
+#endif
+
+static void
+convert_ipv4_5tuple(struct ipv4_5tuple *key1,
+ union ipv4_5tuple_host *key2)
+{
+ key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
+ key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
+ key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
+ key2->port_src = rte_cpu_to_be_16(key1->port_src);
+ key2->proto = key1->proto;
+ key2->pad0 = 0;
+ key2->pad1 = 0;
+}
+
+static void
+convert_ipv6_5tuple(struct ipv6_5tuple *key1,
+ union ipv6_5tuple_host *key2)
+{
+ uint32_t i;
+
+ for (i = 0; i < 16; i++) {
+ key2->ip_dst[i] = key1->ip_dst[i];
+ key2->ip_src[i] = key1->ip_src[i];
+ }
+ key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
+ key2->port_src = rte_cpu_to_be_16(key1->port_src);
+ key2->proto = key1->proto;
+ key2->pad0 = 0;
+ key2->pad1 = 0;
+ key2->reserve = 0;
+}
+
+#define BYTE_VALUE_MAX 256
+#define ALL_32_BITS 0xffffffff
+#define BIT_8_TO_15 0x0000ff00
+
+static inline void
+populate_ipv4_few_flow_into_table(const struct rte_hash *h)
+{
+ uint32_t i;
+ int32_t ret;
+
+ mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
+
+ for (i = 0; i < IPV4_L3FWD_EM_NUM_ROUTES; i++) {
+ struct ipv4_l3fwd_em_route entry;
+ union ipv4_5tuple_host newkey;
+
+ entry = ipv4_l3fwd_em_route_array[i];
+ convert_ipv4_5tuple(&entry.key, &newkey);
+ ret = rte_hash_add_key(h, (void *) &newkey);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
+ " to the l3fwd hash.\n", i);
+ }
+ ipv4_l3fwd_out_if[ret] = entry.if_out;
+ }
+ printf("Hash: Adding 0x%" PRIx64 " keys\n",
+ (uint64_t)IPV4_L3FWD_EM_NUM_ROUTES);
+}
+
+#define BIT_16_TO_23 0x00ff0000
+static inline void
+populate_ipv6_few_flow_into_table(const struct rte_hash *h)
+{
+ uint32_t i;
+ int32_t ret;
+
+ mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
+
+ mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} };
+
+ for (i = 0; i < IPV6_L3FWD_EM_NUM_ROUTES; i++) {
+ struct ipv6_l3fwd_em_route entry;
+ union ipv6_5tuple_host newkey;
+
+ entry = ipv6_l3fwd_em_route_array[i];
+ convert_ipv6_5tuple(&entry.key, &newkey);
+ ret = rte_hash_add_key(h, (void *) &newkey);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
+ " to the l3fwd hash.\n", i);
+ }
+ ipv6_l3fwd_out_if[ret] = entry.if_out;
+ }
+ printf("Hash: Adding 0x%" PRIx64 "keys\n",
+ (uint64_t)IPV6_L3FWD_EM_NUM_ROUTES);
+}
+
+#define NUMBER_PORT_USED 4
+static inline void
+populate_ipv4_many_flow_into_table(const struct rte_hash *h,
+ unsigned int nr_flow)
+{
+ unsigned i;
+
+ mask0 = (rte_xmm_t){.u32 = {BIT_8_TO_15, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
+
+ for (i = 0; i < nr_flow; i++) {
+ struct ipv4_l3fwd_em_route entry;
+ union ipv4_5tuple_host newkey;
+
+ uint8_t a = (uint8_t)
+ ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX);
+ uint8_t b = (uint8_t)
+ (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX);
+ uint8_t c = (uint8_t)
+ ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX));
+
+ /* Create the ipv4 exact match flow */
+ memset(&entry, 0, sizeof(entry));
+ switch (i & (NUMBER_PORT_USED - 1)) {
+ case 0:
+ entry = ipv4_l3fwd_em_route_array[0];
+ entry.key.ip_dst = IPv4(101, c, b, a);
+ break;
+ case 1:
+ entry = ipv4_l3fwd_em_route_array[1];
+ entry.key.ip_dst = IPv4(201, c, b, a);
+ break;
+ case 2:
+ entry = ipv4_l3fwd_em_route_array[2];
+ entry.key.ip_dst = IPv4(111, c, b, a);
+ break;
+ case 3:
+ entry = ipv4_l3fwd_em_route_array[3];
+ entry.key.ip_dst = IPv4(211, c, b, a);
+ break;
+ };
+ convert_ipv4_5tuple(&entry.key, &newkey);
+ int32_t ret = rte_hash_add_key(h, (void *) &newkey);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
+
+ ipv4_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
+
+ }
+ printf("Hash: Adding 0x%x keys\n", nr_flow);
+}
+
+static inline void
+populate_ipv6_many_flow_into_table(const struct rte_hash *h,
+ unsigned int nr_flow)
+{
+ unsigned i;
+
+ mask1 = (rte_xmm_t){.u32 = {BIT_16_TO_23, ALL_32_BITS,
+ ALL_32_BITS, ALL_32_BITS} };
+ mask2 = (rte_xmm_t){.u32 = {ALL_32_BITS, ALL_32_BITS, 0, 0} };
+
+ for (i = 0; i < nr_flow; i++) {
+ struct ipv6_l3fwd_em_route entry;
+ union ipv6_5tuple_host newkey;
+
+ uint8_t a = (uint8_t)
+ ((i/NUMBER_PORT_USED)%BYTE_VALUE_MAX);
+ uint8_t b = (uint8_t)
+ (((i/NUMBER_PORT_USED)/BYTE_VALUE_MAX)%BYTE_VALUE_MAX);
+ uint8_t c = (uint8_t)
+ ((i/NUMBER_PORT_USED)/(BYTE_VALUE_MAX*BYTE_VALUE_MAX));
+
+ /* Create the ipv6 exact match flow */
+ memset(&entry, 0, sizeof(entry));
+ switch (i & (NUMBER_PORT_USED - 1)) {
+ case 0:
+ entry = ipv6_l3fwd_em_route_array[0];
+ break;
+ case 1:
+ entry = ipv6_l3fwd_em_route_array[1];
+ break;
+ case 2:
+ entry = ipv6_l3fwd_em_route_array[2];
+ break;
+ case 3:
+ entry = ipv6_l3fwd_em_route_array[3];
+ break;
+ };
+ entry.key.ip_dst[13] = c;
+ entry.key.ip_dst[14] = b;
+ entry.key.ip_dst[15] = a;
+ convert_ipv6_5tuple(&entry.key, &newkey);
+ int32_t ret = rte_hash_add_key(h, (void *) &newkey);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
+
+ ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
+
+ }
+ printf("Hash: Adding 0x%x keys\n", nr_flow);
+}
+
+/* Requirements:
+ * 1. IP packets without extension;
+ * 2. L4 payload should be either TCP or UDP.
+ */
+int
+em_check_ptype(int portid)
+{
+ int i, ret;
+ int ptype_l3_ipv4_ext = 0;
+ int ptype_l3_ipv6_ext = 0;
+ int ptype_l4_tcp = 0;
+ int ptype_l4_udp = 0;
+ uint32_t ptype_mask = RTE_PTYPE_L3_MASK | RTE_PTYPE_L4_MASK;
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
+ if (ret <= 0)
+ return 0;
+
+ uint32_t ptypes[ret];
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
+ for (i = 0; i < ret; ++i) {
+ switch (ptypes[i]) {
+ case RTE_PTYPE_L3_IPV4_EXT:
+ ptype_l3_ipv4_ext = 1;
+ break;
+ case RTE_PTYPE_L3_IPV6_EXT:
+ ptype_l3_ipv6_ext = 1;
+ break;
+ case RTE_PTYPE_L4_TCP:
+ ptype_l4_tcp = 1;
+ break;
+ case RTE_PTYPE_L4_UDP:
+ ptype_l4_udp = 1;
+ break;
+ }
+ }
+
+ if (ptype_l3_ipv4_ext == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV4_EXT\n", portid);
+ if (ptype_l3_ipv6_ext == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV6_EXT\n", portid);
+ if (!ptype_l3_ipv4_ext || !ptype_l3_ipv6_ext)
+ return 0;
+
+ if (ptype_l4_tcp == 0)
+ printf("port %d cannot parse RTE_PTYPE_L4_TCP\n", portid);
+ if (ptype_l4_udp == 0)
+ printf("port %d cannot parse RTE_PTYPE_L4_UDP\n", portid);
+ if (ptype_l4_tcp && ptype_l4_udp)
+ return 1;
+
+ return 0;
+}
+
+static inline void
+em_parse_ptype(struct rte_mbuf *m)
+{
+ struct ether_hdr *eth_hdr;
+ uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+ uint16_t ether_type;
+ void *l3;
+ int hdr_len;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ l3 = (uint8_t *)eth_hdr + sizeof(struct ether_hdr);
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ ipv4_hdr = (struct ipv4_hdr *)l3;
+ hdr_len = (ipv4_hdr->version_ihl & IPV4_HDR_IHL_MASK) *
+ IPV4_IHL_MULTIPLIER;
+ if (hdr_len == sizeof(struct ipv4_hdr)) {
+ packet_type |= RTE_PTYPE_L3_IPV4;
+ if (ipv4_hdr->next_proto_id == IPPROTO_TCP)
+ packet_type |= RTE_PTYPE_L4_TCP;
+ else if (ipv4_hdr->next_proto_id == IPPROTO_UDP)
+ packet_type |= RTE_PTYPE_L4_UDP;
+ } else
+ packet_type |= RTE_PTYPE_L3_IPV4_EXT;
+ } else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4)) {
+ ipv6_hdr = (struct ipv6_hdr *)l3;
+ if (ipv6_hdr->proto == IPPROTO_TCP)
+ packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_TCP;
+ else if (ipv6_hdr->proto == IPPROTO_UDP)
+ packet_type |= RTE_PTYPE_L3_IPV6 | RTE_PTYPE_L4_UDP;
+ else
+ packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+ }
+
+ m->packet_type = packet_type;
+}
+
+uint16_t
+em_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused,
+ void *user_param __rte_unused)
+{
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; ++i)
+ em_parse_ptype(pkts[i]);
+
+ return nb_pkts;
+}
+
+/* main processing loop */
+int
+em_main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD,
+ " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ while (!force_quit) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_tx_port; ++i) {
+ portid = qconf->tx_port_id[i];
+ if (qconf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(qconf,
+ qconf->tx_mbufs[portid].len,
+ portid);
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+ if (nb_rx == 0)
+ continue;
+
+#if defined(__SSE4_1__)
+ l3fwd_em_send_packets(nb_rx, pkts_burst,
+ portid, qconf);
+#else
+ l3fwd_em_no_opt_send_packets(nb_rx, pkts_burst,
+ portid, qconf);
+#endif /* __SSE_4_1__ */
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Initialize exact match (hash) parameters.
+ */
+void
+setup_hash(const int socketid)
+{
+ struct rte_hash_parameters ipv4_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(union ipv4_5tuple_host),
+ .hash_func = ipv4_hash_crc,
+ .hash_func_init_val = 0,
+ };
+
+ struct rte_hash_parameters ipv6_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(union ipv6_5tuple_host),
+ .hash_func = ipv6_hash_crc,
+ .hash_func_init_val = 0,
+ };
+
+ char s[64];
+
+ /* create ipv4 hash */
+ snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+ ipv4_l3fwd_hash_params.name = s;
+ ipv4_l3fwd_hash_params.socket_id = socketid;
+ ipv4_l3fwd_em_lookup_struct[socketid] =
+ rte_hash_create(&ipv4_l3fwd_hash_params);
+ if (ipv4_l3fwd_em_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Unable to create the l3fwd hash on socket %d\n",
+ socketid);
+
+ /* create ipv6 hash */
+ snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+ ipv6_l3fwd_hash_params.name = s;
+ ipv6_l3fwd_hash_params.socket_id = socketid;
+ ipv6_l3fwd_em_lookup_struct[socketid] =
+ rte_hash_create(&ipv6_l3fwd_hash_params);
+ if (ipv6_l3fwd_em_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Unable to create the l3fwd hash on socket %d\n",
+ socketid);
+
+ if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) {
+ /* For testing hash matching with a large number of flows we
+ * generate millions of IP 5-tuples with an incremented dst
+ * address to initialize the hash table. */
+ if (ipv6 == 0) {
+ /* populate the ipv4 hash */
+ populate_ipv4_many_flow_into_table(
+ ipv4_l3fwd_em_lookup_struct[socketid],
+ hash_entry_number);
+ } else {
+ /* populate the ipv6 hash */
+ populate_ipv6_many_flow_into_table(
+ ipv6_l3fwd_em_lookup_struct[socketid],
+ hash_entry_number);
+ }
+ } else {
+ /*
+ * Use data in ipv4/ipv6 l3fwd lookup table
+ * directly to initialize the hash table.
+ */
+ if (ipv6 == 0) {
+ /* populate the ipv4 hash */
+ populate_ipv4_few_flow_into_table(
+ ipv4_l3fwd_em_lookup_struct[socketid]);
+ } else {
+ /* populate the ipv6 hash */
+ populate_ipv6_few_flow_into_table(
+ ipv6_l3fwd_em_lookup_struct[socketid]);
+ }
+ }
+}
+
+/* Return ipv4/ipv6 em fwd lookup struct. */
+void *
+em_get_ipv4_l3fwd_lookup_struct(const int socketid)
+{
+ return ipv4_l3fwd_em_lookup_struct[socketid];
+}
+
+void *
+em_get_ipv6_l3fwd_lookup_struct(const int socketid)
+{
+ return ipv6_l3fwd_em_lookup_struct[socketid];
+}
diff --git a/examples/l3fwd/l3fwd_em.h b/examples/l3fwd/l3fwd_em.h
new file mode 100644
index 00000000..2284bbd5
--- /dev/null
+++ b/examples/l3fwd/l3fwd_em.h
@@ -0,0 +1,138 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3FWD_EM_H__
+#define __L3FWD_EM_H__
+
+static inline __attribute__((always_inline)) void
+l3fwd_em_simple_forward(struct rte_mbuf *m, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ uint8_t dst_port;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ tcp_or_udp = m->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = m->packet_type & RTE_PTYPE_L3_MASK;
+
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
+ /* Handle IPv4 headers.*/
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+ dst_port = em_get_ipv4_dst_port(ipv4_hdr, portid,
+ qconf->ipv4_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(qconf, m, dst_port);
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
+ /* Handle IPv6 headers.*/
+ struct ipv6_hdr *ipv6_hdr;
+
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ dst_port = em_get_ipv6_dst_port(ipv6_hdr, portid,
+ qconf->ipv6_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(qconf, m, dst_port);
+ } else {
+ /* Free the mbuf that contains non-IPV4/IPV6 packet */
+ rte_pktmbuf_free(m);
+ }
+}
+
+/*
+ * Buffer non-optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
+
+ /*
+ * Prefetch and forward already prefetched
+ * packets.
+ */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_em_simple_forward(pkts_burst[j], portid, qconf);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++)
+ l3fwd_em_simple_forward(pkts_burst[j], portid, qconf);
+}
+
+#endif /* __L3FWD_EM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_hlm_sse.h b/examples/l3fwd/l3fwd_em_hlm_sse.h
new file mode 100644
index 00000000..5001c724
--- /dev/null
+++ b/examples/l3fwd/l3fwd_em_hlm_sse.h
@@ -0,0 +1,342 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3FWD_EM_HLM_SSE_H__
+#define __L3FWD_EM_HLM_SSE_H__
+
+#include "l3fwd_sse.h"
+
+static inline __attribute__((always_inline)) void
+em_get_dst_port_ipv4x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
+ uint8_t portid, uint16_t dst_port[8])
+{
+ int32_t ret[8];
+ union ipv4_5tuple_host key[8];
+ __m128i data[8];
+
+ data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+
+ key[0].xmm = _mm_and_si128(data[0], mask0.x);
+ key[1].xmm = _mm_and_si128(data[1], mask0.x);
+ key[2].xmm = _mm_and_si128(data[2], mask0.x);
+ key[3].xmm = _mm_and_si128(data[3], mask0.x);
+ key[4].xmm = _mm_and_si128(data[4], mask0.x);
+ key[5].xmm = _mm_and_si128(data[5], mask0.x);
+ key[6].xmm = _mm_and_si128(data[6], mask0.x);
+ key[7].xmm = _mm_and_si128(data[7], mask0.x);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
+
+ rte_hash_lookup_multi(qconf->ipv4_lookup_struct, &key_array[0], 8, ret);
+
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ?
+ portid : ipv4_l3fwd_out_if[ret[7]]);
+
+ if (dst_port[0] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[0]) == 0)
+ dst_port[0] = portid;
+
+ if (dst_port[1] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[1]) == 0)
+ dst_port[1] = portid;
+
+ if (dst_port[2] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[2]) == 0)
+ dst_port[2] = portid;
+
+ if (dst_port[3] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[3]) == 0)
+ dst_port[3] = portid;
+
+ if (dst_port[4] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+
+ if (dst_port[5] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+
+ if (dst_port[6] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+
+ if (dst_port[7] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
+
+}
+
+static inline void
+get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
+ __m128i mask1, union ipv6_5tuple_host *key)
+{
+ __m128i tmpdata0 = _mm_loadu_si128(
+ rte_pktmbuf_mtod_offset(m0, __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len)));
+
+ __m128i tmpdata1 = _mm_loadu_si128(
+ rte_pktmbuf_mtod_offset(m0, __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len) +
+ sizeof(__m128i)));
+
+ __m128i tmpdata2 = _mm_loadu_si128(
+ rte_pktmbuf_mtod_offset(m0, __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len) +
+ sizeof(__m128i) + sizeof(__m128i)));
+
+ key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
+ key->xmm[1] = tmpdata1;
+ key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
+}
+
+static inline __attribute__((always_inline)) void
+em_get_dst_port_ipv6x8(struct lcore_conf *qconf, struct rte_mbuf *m[8],
+ uint8_t portid, uint16_t dst_port[8])
+{
+ int32_t ret[8];
+ union ipv6_5tuple_host key[8];
+
+ get_ipv6_5tuple(m[0], mask1.x, mask2.x, &key[0]);
+ get_ipv6_5tuple(m[1], mask1.x, mask2.x, &key[1]);
+ get_ipv6_5tuple(m[2], mask1.x, mask2.x, &key[2]);
+ get_ipv6_5tuple(m[3], mask1.x, mask2.x, &key[3]);
+ get_ipv6_5tuple(m[4], mask1.x, mask2.x, &key[4]);
+ get_ipv6_5tuple(m[5], mask1.x, mask2.x, &key[5]);
+ get_ipv6_5tuple(m[6], mask1.x, mask2.x, &key[6]);
+ get_ipv6_5tuple(m[7], mask1.x, mask2.x, &key[7]);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
+
+ rte_hash_lookup_multi(qconf->ipv6_lookup_struct, &key_array[0], 8, ret);
+
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ?
+ portid : ipv6_l3fwd_out_if[ret[7]]);
+
+ if (dst_port[0] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[0]) == 0)
+ dst_port[0] = portid;
+
+ if (dst_port[1] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[1]) == 0)
+ dst_port[1] = portid;
+
+ if (dst_port[2] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[2]) == 0)
+ dst_port[2] = portid;
+
+ if (dst_port[3] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[3]) == 0)
+ dst_port[3] = portid;
+
+ if (dst_port[4] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+
+ if (dst_port[5] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+
+ if (dst_port[6] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+
+ if (dst_port[7] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
+
+}
+
+static inline __attribute__((always_inline)) uint16_t
+em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
+ uint8_t portid)
+{
+ uint8_t next_hop;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
+
+ tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
+
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
+
+ /* Handle IPv4 headers.*/
+ ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+ next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid,
+ qconf->ipv4_lookup_struct);
+
+ if (next_hop >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << next_hop) == 0)
+ next_hop = portid;
+
+ return next_hop;
+
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
+
+ /* Handle IPv6 headers.*/
+ ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid,
+ qconf->ipv6_lookup_struct);
+
+ if (next_hop >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << next_hop) == 0)
+ next_hop = portid;
+
+ return next_hop;
+
+ }
+
+ return portid;
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+ uint16_t dst_port[MAX_PKT_BURST];
+
+ /*
+ * Send nb_rx - nb_rx%8 packets
+ * in groups of 8.
+ */
+ int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
+
+ for (j = 0; j < n; j += 8) {
+
+ uint32_t pkt_type =
+ pkts_burst[j]->packet_type &
+ pkts_burst[j+1]->packet_type &
+ pkts_burst[j+2]->packet_type &
+ pkts_burst[j+3]->packet_type &
+ pkts_burst[j+4]->packet_type &
+ pkts_burst[j+5]->packet_type &
+ pkts_burst[j+6]->packet_type &
+ pkts_burst[j+7]->packet_type;
+
+ uint32_t l3_type = pkt_type & RTE_PTYPE_L3_MASK;
+ uint32_t tcp_or_udp = pkt_type &
+ (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+
+ if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV4)) {
+
+ em_get_dst_port_ipv4x8(qconf, &pkts_burst[j], portid, &dst_port[j]);
+
+ } else if (tcp_or_udp && (l3_type == RTE_PTYPE_L3_IPV6)) {
+
+ em_get_dst_port_ipv6x8(qconf, &pkts_burst[j], portid, &dst_port[j]);
+
+ } else {
+ dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+ dst_port[j+1] = em_get_dst_port(qconf, pkts_burst[j+1], portid);
+ dst_port[j+2] = em_get_dst_port(qconf, pkts_burst[j+2], portid);
+ dst_port[j+3] = em_get_dst_port(qconf, pkts_burst[j+3], portid);
+ dst_port[j+4] = em_get_dst_port(qconf, pkts_burst[j+4], portid);
+ dst_port[j+5] = em_get_dst_port(qconf, pkts_burst[j+5], portid);
+ dst_port[j+6] = em_get_dst_port(qconf, pkts_burst[j+6], portid);
+ dst_port[j+7] = em_get_dst_port(qconf, pkts_burst[j+7], portid);
+ }
+ }
+
+ for (; j < nb_rx; j++)
+ dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+
+}
+#endif /* __L3FWD_EM_SSE_HLM_H__ */
diff --git a/examples/l3fwd/l3fwd_em_sse.h b/examples/l3fwd/l3fwd_em_sse.h
new file mode 100644
index 00000000..c0a9725a
--- /dev/null
+++ b/examples/l3fwd/l3fwd_em_sse.h
@@ -0,0 +1,112 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3FWD_EM_SSE_H__
+#define __L3FWD_EM_SSE_H__
+
+/**
+ * @file
+ * This is an optional implementation of packet classification in Exact-Match
+ * path using sequential packet classification method.
+ * While hash lookup multi seems to provide better performance, it's disabled
+ * by default and can be enabled with NO_HASH_LOOKUP_MULTI global define in
+ * compilation time.
+ */
+
+#include "l3fwd_sse.h"
+
+static inline __attribute__((always_inline)) uint16_t
+em_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
+ uint8_t portid)
+{
+ uint8_t next_hop;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ uint32_t tcp_or_udp;
+ uint32_t l3_ptypes;
+
+ tcp_or_udp = pkt->packet_type & (RTE_PTYPE_L4_TCP | RTE_PTYPE_L4_UDP);
+ l3_ptypes = pkt->packet_type & RTE_PTYPE_L3_MASK;
+
+ if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV4)) {
+
+ /* Handle IPv4 headers.*/
+ ipv4_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+ next_hop = em_get_ipv4_dst_port(ipv4_hdr, portid,
+ qconf->ipv4_lookup_struct);
+
+ if (next_hop >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << next_hop) == 0)
+ next_hop = portid;
+
+ return next_hop;
+
+ } else if (tcp_or_udp && (l3_ptypes == RTE_PTYPE_L3_IPV6)) {
+
+ /* Handle IPv6 headers.*/
+ ipv6_hdr = rte_pktmbuf_mtod_offset(pkt, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ next_hop = em_get_ipv6_dst_port(ipv6_hdr, portid,
+ qconf->ipv6_lookup_struct);
+
+ if (next_hop >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << next_hop) == 0)
+ next_hop = portid;
+
+ return next_hop;
+
+ }
+
+ return portid;
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_em_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+ uint16_t dst_port[MAX_PKT_BURST];
+
+ for (j = 0; j < nb_rx; j++)
+ dst_port[j] = em_get_dst_port(qconf, pkts_burst[j], portid);
+
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+#endif /* __L3FWD_EM_SSE_H__ */
diff --git a/examples/l3fwd/l3fwd_lpm.c b/examples/l3fwd/l3fwd_lpm.c
new file mode 100644
index 00000000..d941bdfd
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm.c
@@ -0,0 +1,356 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <stdbool.h>
+
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_cycles.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+
+#include "l3fwd.h"
+
+struct ipv4_l3fwd_lpm_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+struct ipv6_l3fwd_lpm_route {
+ uint8_t ip[16];
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_lpm_route ipv4_l3fwd_lpm_route_array[] = {
+ {IPv4(1, 1, 1, 0), 24, 0},
+ {IPv4(2, 1, 1, 0), 24, 1},
+ {IPv4(3, 1, 1, 0), 24, 2},
+ {IPv4(4, 1, 1, 0), 24, 3},
+ {IPv4(5, 1, 1, 0), 24, 4},
+ {IPv4(6, 1, 1, 0), 24, 5},
+ {IPv4(7, 1, 1, 0), 24, 6},
+ {IPv4(8, 1, 1, 0), 24, 7},
+};
+
+static struct ipv6_l3fwd_lpm_route ipv6_l3fwd_lpm_route_array[] = {
+ {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
+ {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
+ {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
+ {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
+ {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
+ {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
+ {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
+ {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
+};
+
+#define IPV4_L3FWD_LPM_NUM_ROUTES \
+ (sizeof(ipv4_l3fwd_lpm_route_array) / sizeof(ipv4_l3fwd_lpm_route_array[0]))
+#define IPV6_L3FWD_LPM_NUM_ROUTES \
+ (sizeof(ipv6_l3fwd_lpm_route_array) / sizeof(ipv6_l3fwd_lpm_route_array[0]))
+
+#define IPV4_L3FWD_LPM_MAX_RULES 1024
+#define IPV4_L3FWD_LPM_NUMBER_TBL8S (1 << 8)
+#define IPV6_L3FWD_LPM_MAX_RULES 1024
+#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
+
+struct rte_lpm *ipv4_l3fwd_lpm_lookup_struct[NB_SOCKETS];
+struct rte_lpm6 *ipv6_l3fwd_lpm_lookup_struct[NB_SOCKETS];
+
+#if defined(__SSE4_1__)
+#include "l3fwd_lpm_sse.h"
+#else
+#include "l3fwd_lpm.h"
+#endif
+
+/* main processing loop */
+int
+lpm_main_loop(__attribute__((unused)) void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int i, nb_rx;
+ uint8_t portid, queueid;
+ struct lcore_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_conf[lcore_id];
+
+ if (qconf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_queue; i++) {
+
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD,
+ " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ while (!force_quit) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_tx_port; ++i) {
+ portid = qconf->tx_port_id[i];
+ if (qconf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(qconf,
+ qconf->tx_mbufs[portid].len,
+ portid);
+ qconf->tx_mbufs[portid].len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_queue; ++i) {
+ portid = qconf->rx_queue_list[i].port_id;
+ queueid = qconf->rx_queue_list[i].queue_id;
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+ if (nb_rx == 0)
+ continue;
+
+#if defined(__SSE4_1__)
+ l3fwd_lpm_send_packets(nb_rx, pkts_burst,
+ portid, qconf);
+#else
+ l3fwd_lpm_no_opt_send_packets(nb_rx, pkts_burst,
+ portid, qconf);
+#endif /* __SSE_4_1__ */
+ }
+ }
+
+ return 0;
+}
+
+void
+setup_lpm(const int socketid)
+{
+ struct rte_lpm6_config config;
+ struct rte_lpm_config config_ipv4;
+ unsigned i;
+ int ret;
+ char s[64];
+
+ /* create the LPM table */
+ config_ipv4.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
+ config_ipv4.number_tbl8s = IPV4_L3FWD_LPM_NUMBER_TBL8S;
+ config_ipv4.flags = 0;
+ snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
+ ipv4_l3fwd_lpm_lookup_struct[socketid] =
+ rte_lpm_create(s, socketid, &config_ipv4);
+ if (ipv4_l3fwd_lpm_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Unable to create the l3fwd LPM table on socket %d\n",
+ socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < IPV4_L3FWD_LPM_NUM_ROUTES; i++) {
+
+ /* skip unused ports */
+ if ((1 << ipv4_l3fwd_lpm_route_array[i].if_out &
+ enabled_port_mask) == 0)
+ continue;
+
+ ret = rte_lpm_add(ipv4_l3fwd_lpm_lookup_struct[socketid],
+ ipv4_l3fwd_lpm_route_array[i].ip,
+ ipv4_l3fwd_lpm_route_array[i].depth,
+ ipv4_l3fwd_lpm_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE,
+ "Unable to add entry %u to the l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route 0x%08x / %d (%d)\n",
+ (unsigned)ipv4_l3fwd_lpm_route_array[i].ip,
+ ipv4_l3fwd_lpm_route_array[i].depth,
+ ipv4_l3fwd_lpm_route_array[i].if_out);
+ }
+
+ /* create the LPM6 table */
+ snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
+
+ config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
+ config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
+ config.flags = 0;
+ ipv6_l3fwd_lpm_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
+ &config);
+ if (ipv6_l3fwd_lpm_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Unable to create the l3fwd LPM table on socket %d\n",
+ socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < IPV6_L3FWD_LPM_NUM_ROUTES; i++) {
+
+ /* skip unused ports */
+ if ((1 << ipv6_l3fwd_lpm_route_array[i].if_out &
+ enabled_port_mask) == 0)
+ continue;
+
+ ret = rte_lpm6_add(ipv6_l3fwd_lpm_lookup_struct[socketid],
+ ipv6_l3fwd_lpm_route_array[i].ip,
+ ipv6_l3fwd_lpm_route_array[i].depth,
+ ipv6_l3fwd_lpm_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE,
+ "Unable to add entry %u to the l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route %s / %d (%d)\n",
+ "IPV6",
+ ipv6_l3fwd_lpm_route_array[i].depth,
+ ipv6_l3fwd_lpm_route_array[i].if_out);
+ }
+}
+
+int
+lpm_check_ptype(int portid)
+{
+ int i, ret;
+ int ptype_l3_ipv4 = 0, ptype_l3_ipv6 = 0;
+ uint32_t ptype_mask = RTE_PTYPE_L3_MASK;
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, NULL, 0);
+ if (ret <= 0)
+ return 0;
+
+ uint32_t ptypes[ret];
+
+ ret = rte_eth_dev_get_supported_ptypes(portid, ptype_mask, ptypes, ret);
+ for (i = 0; i < ret; ++i) {
+ if (ptypes[i] & RTE_PTYPE_L3_IPV4)
+ ptype_l3_ipv4 = 1;
+ if (ptypes[i] & RTE_PTYPE_L3_IPV6)
+ ptype_l3_ipv6 = 1;
+ }
+
+ if (ptype_l3_ipv4 == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV4\n", portid);
+
+ if (ptype_l3_ipv6 == 0)
+ printf("port %d cannot parse RTE_PTYPE_L3_IPV6\n", portid);
+
+ if (ptype_l3_ipv4 && ptype_l3_ipv6)
+ return 1;
+
+ return 0;
+
+}
+
+static inline void
+lpm_parse_ptype(struct rte_mbuf *m)
+{
+ struct ether_hdr *eth_hdr;
+ uint32_t packet_type = RTE_PTYPE_UNKNOWN;
+ uint16_t ether_type;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ether_type = eth_hdr->ether_type;
+ if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv4))
+ packet_type |= RTE_PTYPE_L3_IPV4_EXT_UNKNOWN;
+ else if (ether_type == rte_cpu_to_be_16(ETHER_TYPE_IPv6))
+ packet_type |= RTE_PTYPE_L3_IPV6_EXT_UNKNOWN;
+
+ m->packet_type = packet_type;
+}
+
+uint16_t
+lpm_cb_parse_ptype(uint8_t port __rte_unused, uint16_t queue __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused,
+ void *user_param __rte_unused)
+{
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; ++i)
+ lpm_parse_ptype(pkts[i]);
+
+ return nb_pkts;
+}
+
+/* Return ipv4/ipv6 lpm fwd lookup struct. */
+void *
+lpm_get_ipv4_l3fwd_lookup_struct(const int socketid)
+{
+ return ipv4_l3fwd_lpm_lookup_struct[socketid];
+}
+
+void *
+lpm_get_ipv6_l3fwd_lookup_struct(const int socketid)
+{
+ return ipv6_l3fwd_lpm_lookup_struct[socketid];
+}
diff --git a/examples/l3fwd/l3fwd_lpm.h b/examples/l3fwd/l3fwd_lpm.h
new file mode 100644
index 00000000..a43c5070
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm.h
@@ -0,0 +1,151 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3FWD_LPM_H__
+#define __L3FWD_LPM_H__
+
+static inline uint8_t
+lpm_get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid, void *lookup_struct)
+{
+ uint32_t next_hop;
+ struct rte_lpm *ipv4_l3fwd_lookup_struct =
+ (struct rte_lpm *)lookup_struct;
+
+ return (uint8_t) ((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
+ rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
+ &next_hop) == 0) ? next_hop : portid);
+}
+
+static inline uint8_t
+lpm_get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid, void *lookup_struct)
+{
+ uint8_t next_hop;
+ struct rte_lpm6 *ipv6_l3fwd_lookup_struct =
+ (struct rte_lpm6 *)lookup_struct;
+
+ return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
+ ((struct ipv6_hdr *)ipv6_hdr)->dst_addr,
+ &next_hop) == 0) ? next_hop : portid);
+}
+
+static inline __attribute__((always_inline)) void
+l3fwd_lpm_simple_forward(struct rte_mbuf *m, uint8_t portid,
+ struct lcore_conf *qconf)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ uint8_t dst_port;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ /* Handle IPv4 headers.*/
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+ dst_port = lpm_get_ipv4_dst_port(ipv4_hdr, portid,
+ qconf->ipv4_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(qconf, m, dst_port);
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* Handle IPv6 headers.*/
+ struct ipv6_hdr *ipv6_hdr;
+
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ dst_port = lpm_get_ipv6_dst_port(ipv6_hdr, portid,
+ qconf->ipv6_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(qconf, m, dst_port);
+ } else {
+ /* Free the mbuf that contains non-IPV4/IPV6 packet */
+ rte_pktmbuf_free(m);
+ }
+}
+
+static inline void
+l3fwd_lpm_no_opt_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
+
+ /* Prefetch and forward already prefetched packets. */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_lpm_simple_forward(pkts_burst[j], portid, qconf);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++)
+ l3fwd_lpm_simple_forward(pkts_burst[j], portid, qconf);
+}
+
+#endif /* __L3FWD_LPM_H__ */
diff --git a/examples/l3fwd/l3fwd_lpm_sse.h b/examples/l3fwd/l3fwd_lpm_sse.h
new file mode 100644
index 00000000..538fe3d7
--- /dev/null
+++ b/examples/l3fwd/l3fwd_lpm_sse.h
@@ -0,0 +1,213 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __L3FWD_LPM_SSE_H__
+#define __L3FWD_LPM_SSE_H__
+
+#include "l3fwd_sse.h"
+
+static inline __attribute__((always_inline)) uint16_t
+lpm_get_dst_port(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
+ uint8_t portid)
+{
+ uint32_t next_hop_ipv4;
+ uint8_t next_hop_ipv6;
+ struct ipv6_hdr *ipv6_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ether_hdr *eth_hdr;
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+
+ return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct,
+ rte_be_to_cpu_32(ipv4_hdr->dst_addr), &next_hop_ipv4) == 0) ?
+ next_hop_ipv4 : portid);
+
+ } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+
+ return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct,
+ ipv6_hdr->dst_addr, &next_hop_ipv6) == 0)
+ ? next_hop_ipv6 : portid);
+
+ }
+
+ return portid;
+}
+
+/*
+ * lpm_get_dst_port optimized routine for packets where dst_ipv4 is already
+ * precalculated. If packet is ipv6 dst_addr is taken directly from packet
+ * header and dst_ipv4 value is not used.
+ */
+static inline __attribute__((always_inline)) uint16_t
+lpm_get_dst_port_with_ipv4(const struct lcore_conf *qconf, struct rte_mbuf *pkt,
+ uint32_t dst_ipv4, uint8_t portid)
+{
+ uint32_t next_hop_ipv4;
+ uint8_t next_hop_ipv6;
+ struct ipv6_hdr *ipv6_hdr;
+ struct ether_hdr *eth_hdr;
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ return (uint16_t) ((rte_lpm_lookup(qconf->ipv4_lookup_struct, dst_ipv4,
+ &next_hop_ipv4) == 0) ? next_hop_ipv4 : portid);
+
+ } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+
+ return (uint16_t) ((rte_lpm6_lookup(qconf->ipv6_lookup_struct,
+ ipv6_hdr->dst_addr, &next_hop_ipv6) == 0)
+ ? next_hop_ipv6 : portid);
+
+ }
+
+ return portid;
+
+}
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+ __m128i *dip,
+ uint32_t *ipv4_flag)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ether_hdr *eth_hdr;
+ uint32_t x0, x1, x2, x3;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x0 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x1 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[1]->packet_type;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x2 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[2]->packet_type;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x3 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[3]->packet_type;
+
+ dip[0] = _mm_set_epi32(x3, x2, x1, x0);
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(const struct lcore_conf *qconf,
+ __m128i dip,
+ uint32_t ipv4_flag,
+ uint8_t portid,
+ struct rte_mbuf *pkt[FWDSTEP],
+ uint16_t dprt[FWDSTEP])
+{
+ rte_xmm_t dst;
+ const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
+ 4, 5, 6, 7, 0, 1, 2, 3);
+
+ /* Byte swap 4 IPV4 addresses. */
+ dip = _mm_shuffle_epi8(dip, bswap_mask);
+
+ /* if all 4 packets are IPV4. */
+ if (likely(ipv4_flag)) {
+ rte_lpm_lookupx4(qconf->ipv4_lookup_struct, dip, dst.u32,
+ portid);
+ /* get rid of unused upper 16 bit for each dport. */
+ dst.x = _mm_packs_epi32(dst.x, dst.x);
+ *(uint64_t *)dprt = dst.u64[0];
+ } else {
+ dst.x = dip;
+ dprt[0] = lpm_get_dst_port_with_ipv4(qconf, pkt[0], dst.u32[0], portid);
+ dprt[1] = lpm_get_dst_port_with_ipv4(qconf, pkt[1], dst.u32[1], portid);
+ dprt[2] = lpm_get_dst_port_with_ipv4(qconf, pkt[2], dst.u32[2], portid);
+ dprt[3] = lpm_get_dst_port_with_ipv4(qconf, pkt[3], dst.u32[3], portid);
+ }
+}
+
+/*
+ * Buffer optimized handling of packets, invoked
+ * from main_loop.
+ */
+static inline void
+l3fwd_lpm_send_packets(int nb_rx, struct rte_mbuf **pkts_burst,
+ uint8_t portid, struct lcore_conf *qconf)
+{
+ int32_t j;
+ uint16_t dst_port[MAX_PKT_BURST];
+ __m128i dip[MAX_PKT_BURST / FWDSTEP];
+ uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+ const int32_t k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+ &ipv4_flag[j / FWDSTEP]);
+
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step2(qconf, dip[j / FWDSTEP],
+ ipv4_flag[j / FWDSTEP], portid, &pkts_burst[j], &dst_port[j]);
+
+ /* Classify last up to 3 packets one by one */
+ switch (nb_rx % FWDSTEP) {
+ case 3:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ case 2:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ case 1:
+ dst_port[j] = lpm_get_dst_port(qconf, pkts_burst[j], portid);
+ j++;
+ }
+
+ send_packets_multi(qconf, pkts_burst, dst_port, nb_rx);
+}
+
+#endif /* __L3FWD_LPM_SSE_H__ */
diff --git a/examples/l3fwd/l3fwd_sse.h b/examples/l3fwd/l3fwd_sse.h
new file mode 100644
index 00000000..1afa1f00
--- /dev/null
+++ b/examples/l3fwd/l3fwd_sse.h
@@ -0,0 +1,501 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef _L3FWD_COMMON_H_
+#define _L3FWD_COMMON_H_
+
+#include "l3fwd.h"
+
+#ifdef DO_RFC_1812_CHECKS
+
+#define IPV4_MIN_VER_IHL 0x45
+#define IPV4_MAX_VER_IHL 0x4f
+#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
+
+/* Minimum value of IPV4 total length (20B) in network byte order. */
+#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8)
+
+/*
+ * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
+ * - The IP version number must be 4.
+ * - The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ * - The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ * If we encounter invalid IPV4 packet, then set destination port for it
+ * to BAD_PORT value.
+ */
+static inline __attribute__((always_inline)) void
+rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint16_t *dp, uint32_t ptype)
+{
+ uint8_t ihl;
+
+ if (RTE_ETH_IS_IPV4_HDR(ptype)) {
+ ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
+
+ ipv4_hdr->time_to_live--;
+ ipv4_hdr->hdr_checksum++;
+
+ if (ihl > IPV4_MAX_VER_IHL_DIFF ||
+ ((uint8_t)ipv4_hdr->total_length == 0 &&
+ ipv4_hdr->total_length < IPV4_MIN_LEN_BE))
+ dp[0] = BAD_PORT;
+
+ }
+}
+
+#else
+#define rfc1812_process(mb, dp, ptype) do { } while (0)
+#endif /* DO_RFC_1812_CHECKS */
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint16_t dst_port[FWDSTEP])
+{
+ __m128i te[FWDSTEP];
+ __m128i ve[FWDSTEP];
+ __m128i *p[FWDSTEP];
+
+ p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *);
+ p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *);
+ p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *);
+ p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *);
+
+ ve[0] = val_eth[dst_port[0]];
+ te[0] = _mm_loadu_si128(p[0]);
+
+ ve[1] = val_eth[dst_port[1]];
+ te[1] = _mm_loadu_si128(p[1]);
+
+ ve[2] = val_eth[dst_port[2]];
+ te[2] = _mm_loadu_si128(p[2]);
+
+ ve[3] = val_eth[dst_port[3]];
+ te[3] = _mm_loadu_si128(p[3]);
+
+ /* Update first 12 bytes, keep rest bytes intact. */
+ te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH);
+ te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH);
+ te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH);
+ te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH);
+
+ _mm_storeu_si128(p[0], te[0]);
+ _mm_storeu_si128(p[1], te[1]);
+ _mm_storeu_si128(p[2], te[2]);
+ _mm_storeu_si128(p[3], te[3]);
+
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+ &dst_port[0], pkt[0]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+ &dst_port[1], pkt[1]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+ &dst_port[2], pkt[2]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+ &dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * We group consecutive packets with the same destionation port into one burst.
+ * To avoid extra latency this is done together with some other packet
+ * processing, but after we made a final decision about packet's destination.
+ * To do this we maintain:
+ * pnum - array of number of consecutive packets with the same dest port for
+ * each packet in the input burst.
+ * lp - pointer to the last updated element in the pnum.
+ * dlp - dest port value lp corresponds to.
+ */
+
+#define GRPSZ (1 << FWDSTEP)
+#define GRPMSK (GRPSZ - 1)
+
+#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
+ if (likely((dlp) == (dcp)[(idx)])) { \
+ (lp)[0]++; \
+ } else { \
+ (dlp) = (dcp)[idx]; \
+ (lp) = (pn) + (idx); \
+ (lp)[0] = 1; \
+ } \
+} while (0)
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destionation ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisions at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
+{
+ static const struct {
+ uint64_t pnum; /* prebuild 4 values for pnum[]. */
+ int32_t idx; /* index for new last updated elemnet. */
+ uint16_t lpv; /* add value to the last updated element. */
+ } gptbl[GRPSZ] = {
+ {
+ /* 0: a != b, b != c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100010001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 1: a == b, b != c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100010002),
+ .idx = 4,
+ .lpv = 1,
+ },
+ {
+ /* 2: a != b, b == c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100020001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 3: a == b, b == c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100020003),
+ .idx = 4,
+ .lpv = 2,
+ },
+ {
+ /* 4: a != b, b != c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200010001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 5: a == b, b != c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200010002),
+ .idx = 4,
+ .lpv = 1,
+ },
+ {
+ /* 6: a != b, b == c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200030001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 7: a == b, b == c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200030004),
+ .idx = 4,
+ .lpv = 3,
+ },
+ {
+ /* 8: a != b, b != c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100010001),
+ .idx = 3,
+ .lpv = 0,
+ },
+ {
+ /* 9: a == b, b != c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100010002),
+ .idx = 3,
+ .lpv = 1,
+ },
+ {
+ /* 0xa: a != b, b == c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100020001),
+ .idx = 3,
+ .lpv = 0,
+ },
+ {
+ /* 0xb: a == b, b == c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100020003),
+ .idx = 3,
+ .lpv = 2,
+ },
+ {
+ /* 0xc: a != b, b != c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300010001),
+ .idx = 2,
+ .lpv = 0,
+ },
+ {
+ /* 0xd: a == b, b != c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300010002),
+ .idx = 2,
+ .lpv = 1,
+ },
+ {
+ /* 0xe: a != b, b == c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300040001),
+ .idx = 1,
+ .lpv = 0,
+ },
+ {
+ /* 0xf: a == b, b == c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300040005),
+ .idx = 0,
+ .lpv = 4,
+ },
+ };
+
+ union {
+ uint16_t u16[FWDSTEP + 1];
+ uint64_t u64;
+ } *pnum = (void *)pn;
+
+ int32_t v;
+
+ dp1 = _mm_cmpeq_epi16(dp1, dp2);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ v = _mm_movemask_ps((__m128)dp1);
+
+ /* update last port counter. */
+ lp[0] += gptbl[v].lpv;
+
+ /* if dest port value has changed. */
+ if (v != GRPMSK) {
+ pnum->u64 = gptbl[v].pnum;
+ pnum->u16[FWDSTEP] = 1;
+ lp = pnum->u16 + gptbl[v].idx;
+ }
+
+ return lp;
+}
+
+/**
+ * Process one packet:
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+process_packet(struct rte_mbuf *pkt, uint16_t *dst_port)
+{
+ struct ether_hdr *eth_hdr;
+ __m128i te, ve;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+ te = _mm_loadu_si128((__m128i *)eth_hdr);
+ ve = val_eth[dst_port[0]];
+
+ rfc1812_process((struct ipv4_hdr *)(eth_hdr + 1), dst_port,
+ pkt->packet_type);
+
+ te = _mm_blend_epi16(te, ve, MASK_ETH);
+ _mm_storeu_si128((__m128i *)eth_hdr, te);
+}
+
+static inline __attribute__((always_inline)) void
+send_packetsx4(struct lcore_conf *qconf, uint8_t port, struct rte_mbuf *m[],
+ uint32_t num)
+{
+ uint32_t len, j, n;
+
+ len = qconf->tx_mbufs[port].len;
+
+ /*
+ * If TX buffer for that queue is empty, and we have enough packets,
+ * then send them straightway.
+ */
+ if (num >= MAX_TX_BURST && len == 0) {
+ n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
+ if (unlikely(n < num)) {
+ do {
+ rte_pktmbuf_free(m[n]);
+ } while (++n < num);
+ }
+ return;
+ }
+
+ /*
+ * Put packets into TX buffer for that queue.
+ */
+
+ n = len + num;
+ n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
+
+ j = 0;
+ switch (n % FWDSTEP) {
+ while (j < n) {
+ case 0:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 3:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 2:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 1:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ }
+ }
+
+ len += n;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+
+ send_burst(qconf, MAX_PKT_BURST, port);
+
+ /* copy rest of the packets into the TX buffer. */
+ len = num - n;
+ j = 0;
+ switch (len % FWDSTEP) {
+ while (j < len) {
+ case 0:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 3:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 2:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 1:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ }
+ }
+ }
+
+ qconf->tx_mbufs[port].len = len;
+}
+
+/**
+ * Send packets burst from pkts_burst to the ports in dst_port array
+ */
+static inline __attribute__((always_inline)) void
+send_packets_multi(struct lcore_conf *qconf, struct rte_mbuf **pkts_burst,
+ uint16_t dst_port[MAX_PKT_BURST], int nb_rx)
+{
+ int32_t k;
+ int j = 0;
+ uint16_t dlp;
+ uint16_t *lp;
+ uint16_t pnum[MAX_PKT_BURST + 1];
+
+ /*
+ * Finish packet processing and group consecutive
+ * packets with the same destination port.
+ */
+ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+ if (k != 0) {
+ __m128i dp1, dp2;
+
+ lp = pnum;
+ lp[0] = 1;
+
+ processx4_step3(pkts_burst, dst_port);
+
+ /* dp1: <d[0], d[1], d[2], d[3], ... > */
+ dp1 = _mm_loadu_si128((__m128i *)dst_port);
+
+ for (j = FWDSTEP; j != k; j += FWDSTEP) {
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+ /*
+ * dp2:
+ * <d[j-3], d[j-2], d[j-1], d[j], ... >
+ */
+ dp2 = _mm_loadu_si128((__m128i *)
+ &dst_port[j - FWDSTEP + 1]);
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * dp1:
+ * <d[j], d[j+1], d[j+2], d[j+3], ... >
+ */
+ dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) *
+ sizeof(dst_port[0]));
+ }
+
+ /*
+ * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+ */
+ dp2 = _mm_shufflelo_epi16(dp1, 0xf9);
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * remove values added by the last repeated
+ * dst port.
+ */
+ lp[0]--;
+ dlp = dst_port[j - 1];
+ } else {
+ /* set dlp and lp to the never used values. */
+ dlp = BAD_PORT - 1;
+ lp = pnum + MAX_PKT_BURST;
+ }
+
+ /* Process up to last 3 packets one by one. */
+ switch (nb_rx % FWDSTEP) {
+ case 3:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ case 2:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ case 1:
+ process_packet(pkts_burst[j], dst_port + j);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ }
+
+ /*
+ * Send packets out, through destination port.
+ * Consecutive packets with the same destination port
+ * are already grouped together.
+ * If destination port for the packet equals BAD_PORT,
+ * then free the packet without sending it out.
+ */
+ for (j = 0; j < nb_rx; j += k) {
+
+ int32_t m;
+ uint16_t pn;
+
+ pn = dst_port[j];
+ k = pnum[j];
+
+ if (likely(pn != BAD_PORT))
+ send_packetsx4(qconf, pn, pkts_burst + j, k);
+ else
+ for (m = j; m != j + k; m++)
+ rte_pktmbuf_free(pkts_burst[m]);
+
+ }
+}
+
+#endif /* _L3FWD_COMMON_H_ */
diff --git a/examples/l3fwd/main.c b/examples/l3fwd/main.c
new file mode 100644
index 00000000..bf6d8856
--- /dev/null
+++ b/examples/l3fwd/main.c
@@ -0,0 +1,1055 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <signal.h>
+#include <stdbool.h>
+
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+#include <rte_cpuflags.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include "l3fwd.h"
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_LCORE_PARAMS 1024
+
+/* Static global variables used within this file. */
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/**< Ports set in promiscuous mode off by default. */
+static int promiscuous_on;
+
+/* Select Longest-Prefix or Exact match. */
+static int l3fwd_lpm_on;
+static int l3fwd_em_on;
+
+static int numa_on = 1; /**< NUMA is enabled by default. */
+static int parse_ptype; /**< Parse packet type using rx callback, and */
+ /**< disabled by default */
+
+/* Global variables. */
+
+volatile bool force_quit;
+
+/* ethernet addresses of ports */
+uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
+struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+xmm_t val_eth[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+uint32_t enabled_port_mask;
+
+/* Used only in exact match mode. */
+int ipv6; /**< ipv6 is false by default. */
+uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
+
+struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+
+struct lcore_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+} __rte_cache_aligned;
+
+static struct lcore_params lcore_params_array[MAX_LCORE_PARAMS];
+static struct lcore_params lcore_params_array_default[] = {
+ {0, 0, 2},
+ {0, 1, 2},
+ {0, 2, 2},
+ {1, 0, 2},
+ {1, 1, 2},
+ {1, 2, 2},
+ {2, 0, 2},
+ {3, 0, 3},
+ {3, 1, 3},
+};
+
+static struct lcore_params * lcore_params = lcore_params_array_default;
+static uint16_t nb_lcore_params = sizeof(lcore_params_array_default) /
+ sizeof(lcore_params_array_default[0]);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool * pktmbuf_pool[NB_SOCKETS];
+
+struct l3fwd_lkp_mode {
+ void (*setup)(int);
+ int (*check_ptype)(int);
+ rte_rx_callback_fn cb_parse_ptype;
+ int (*main_loop)(void *);
+ void* (*get_ipv4_lookup_struct)(int);
+ void* (*get_ipv6_lookup_struct)(int);
+};
+
+static struct l3fwd_lkp_mode l3fwd_lkp;
+
+static struct l3fwd_lkp_mode l3fwd_em_lkp = {
+ .setup = setup_hash,
+ .check_ptype = em_check_ptype,
+ .cb_parse_ptype = em_cb_parse_ptype,
+ .main_loop = em_main_loop,
+ .get_ipv4_lookup_struct = em_get_ipv4_l3fwd_lookup_struct,
+ .get_ipv6_lookup_struct = em_get_ipv6_l3fwd_lookup_struct,
+};
+
+static struct l3fwd_lkp_mode l3fwd_lpm_lkp = {
+ .setup = setup_lpm,
+ .check_ptype = lpm_check_ptype,
+ .cb_parse_ptype = lpm_cb_parse_ptype,
+ .main_loop = lpm_main_loop,
+ .get_ipv4_lookup_struct = lpm_get_ipv4_l3fwd_lookup_struct,
+ .get_ipv6_lookup_struct = lpm_get_ipv6_l3fwd_lookup_struct,
+};
+
+/*
+ * Setup lookup methods for forwarding.
+ * Currently exact-match and longest-prefix-match
+ * are supported ones.
+ */
+static void
+setup_l3fwd_lookup_tables(void)
+{
+ /* Setup HASH lookup functions. */
+ if (l3fwd_em_on)
+ l3fwd_lkp = l3fwd_em_lkp;
+ /* Setup LPM lookup functions. */
+ else
+ l3fwd_lkp = l3fwd_lpm_lkp;
+}
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i;
+ int socketid;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ queue = lcore_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ printf("invalid queue number: %hhu\n", queue);
+ return -1;
+ }
+ lcore = lcore_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
+ return -1;
+ }
+ if ((socketid = rte_lcore_to_socket_id(lcore) != 0) &&
+ (numa_on == 0)) {
+ printf("warning: lcore %hhu is on socket %d with numa off \n",
+ lcore, socketid);
+ }
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ portid = lcore_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ if (lcore_params[i].port_id == port) {
+ if (lcore_params[i].queue_id == queue+1)
+ queue = lcore_params[i].queue_id;
+ else
+ rte_exit(EXIT_FAILURE, "queue ids of the port %d must be"
+ " in sequence and must start with 0\n",
+ lcore_params[i].port_id);
+ }
+ }
+ return (uint8_t)(++queue);
+}
+
+static int
+init_lcore_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t lcore;
+
+ for (i = 0; i < nb_lcore_params; ++i) {
+ lcore = lcore_params[i].lcore_id;
+ nb_rx_queue = lcore_conf[lcore].n_rx_queue;
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for lcore: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)lcore);
+ return -1;
+ } else {
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].port_id =
+ lcore_params[i].port_id;
+ lcore_conf[lcore].rx_queue_list[nb_rx_queue].queue_id =
+ lcore_params[i].queue_id;
+ lcore_conf[lcore].n_rx_queue++;
+ }
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf ("%s [EAL options] -- -p PORTMASK -P"
+ " [--config (port,queue,lcore)[,(port,queue,lcore]]"
+ " [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -P : enable promiscuous mode\n"
+ " -E : enable exact match\n"
+ " -L : enable longest prefix match\n"
+ " --config (port,queue,lcore): rx queues configuration\n"
+ " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
+ " --no-numa: optional, disable numa awareness\n"
+ " --ipv6: optional, specify it if running ipv6 packets\n"
+ " --enable-jumbo: enable jumbo frame"
+ " which max packet len is PKTLEN in decimal (64-9600)\n"
+ " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n",
+ prgname);
+}
+
+static int
+parse_max_pkt_len(const char *pktlen)
+{
+ char *end = NULL;
+ unsigned long len;
+
+ /* parse decimal string */
+ len = strtoul(pktlen, &end, 10);
+ if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ return len;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_hash_entry_number(const char *hash_entry_num)
+{
+ char *end = NULL;
+ unsigned long hash_en;
+ /* parse hexadecimal string */
+ hash_en = strtoul(hash_entry_num, &end, 16);
+ if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (hash_en == 0)
+ return -1;
+
+ return hash_en;
+}
+
+static int
+parse_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_lcore_params = 0;
+
+ while ((p = strchr(p0,'(')) != NULL) {
+ ++p;
+ if((p0 = strchr(p,')')) == NULL)
+ return -1;
+
+ size = p0 - p;
+ if(size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++){
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_lcore_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of lcore params: %hu\n",
+ nb_lcore_params);
+ return -1;
+ }
+ lcore_params_array[nb_lcore_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ lcore_params_array[nb_lcore_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ lcore_params_array[nb_lcore_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ ++nb_lcore_params;
+ }
+ lcore_params = lcore_params_array;
+ return 0;
+}
+
+static void
+parse_eth_dest(const char *optarg)
+{
+ uint8_t portid;
+ char *port_end;
+ uint8_t c, *dest, peer_addr[6];
+
+ errno = 0;
+ portid = strtoul(optarg, &port_end, 10);
+ if (errno != 0 || port_end == optarg || *port_end++ != ',')
+ rte_exit(EXIT_FAILURE,
+ "Invalid eth-dest: %s", optarg);
+ if (portid >= RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE,
+ "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
+ portid, RTE_MAX_ETHPORTS);
+
+ if (cmdline_parse_etheraddr(NULL, port_end,
+ &peer_addr, sizeof(peer_addr)) < 0)
+ rte_exit(EXIT_FAILURE,
+ "Invalid ethernet address: %s\n",
+ port_end);
+ dest = (uint8_t *)&dest_eth_addr[portid];
+ for (c = 0; c < 6; c++)
+ dest[c] = peer_addr[c];
+ *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
+}
+
+#define MAX_JUMBO_PKT_LEN 9600
+#define MEMPOOL_CACHE_SIZE 256
+
+#define CMD_LINE_OPT_CONFIG "config"
+#define CMD_LINE_OPT_ETH_DEST "eth-dest"
+#define CMD_LINE_OPT_NO_NUMA "no-numa"
+#define CMD_LINE_OPT_IPV6 "ipv6"
+#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
+#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
+#define CMD_LINE_OPT_PARSE_PTYPE "parse-ptype"
+
+/*
+ * This expression is used to calculate the number of mbufs needed
+ * depending on user input, taking into account memory for rx and
+ * tx hardware rings, cache per lcore and mtable per port per lcore.
+ * RTE_MAX is used to ensure that NB_MBUF never goes below a minimum
+ * value of 8192
+ */
+#define NB_MBUF RTE_MAX( \
+ (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
+ nb_ports*nb_lcores*MAX_PKT_BURST + \
+ nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
+ (unsigned)8192)
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {CMD_LINE_OPT_CONFIG, 1, 0, 0},
+ {CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
+ {CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
+ {CMD_LINE_OPT_IPV6, 0, 0, 0},
+ {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
+ {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
+ {CMD_LINE_OPT_PARSE_PTYPE, 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ /* Error or normal output strings. */
+ const char *str1 = "L3FWD: Invalid portmask";
+ const char *str2 = "L3FWD: Promiscuous mode selected";
+ const char *str3 = "L3FWD: Exact match selected";
+ const char *str4 = "L3FWD: Longest-prefix match selected";
+ const char *str5 = "L3FWD: Invalid config";
+ const char *str6 = "L3FWD: NUMA is disabled";
+ const char *str7 = "L3FWD: IPV6 is specified";
+ const char *str8 =
+ "L3FWD: Jumbo frame is enabled - disabling simple TX path";
+ const char *str9 = "L3FWD: Invalid packet length";
+ const char *str10 = "L3FWD: Set jumbo frame max packet len to ";
+ const char *str11 = "L3FWD: Invalid hash entry number";
+ const char *str12 =
+ "L3FWD: LPM and EM are mutually exclusive, select only one";
+ const char *str13 = "L3FWD: LPM or EM none selected, default LPM on";
+
+ while ((opt = getopt_long(argc, argvopt, "p:PLE",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("%s\n", str1);
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'P':
+ printf("%s\n", str2);
+ promiscuous_on = 1;
+ break;
+
+ case 'E':
+ printf("%s\n", str3);
+ l3fwd_em_on = 1;
+ break;
+
+ case 'L':
+ printf("%s\n", str4);
+ l3fwd_lpm_on = 1;
+ break;
+
+ /* long options */
+ case 0:
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_CONFIG,
+ sizeof(CMD_LINE_OPT_CONFIG))) {
+
+ ret = parse_config(optarg);
+ if (ret) {
+ printf("%s\n", str5);
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_ETH_DEST,
+ sizeof(CMD_LINE_OPT_ETH_DEST))) {
+ parse_eth_dest(optarg);
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_NO_NUMA,
+ sizeof(CMD_LINE_OPT_NO_NUMA))) {
+ printf("%s\n", str6);
+ numa_on = 0;
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_IPV6,
+ sizeof(CMD_LINE_OPT_IPV6))) {
+ printf("%sn", str7);
+ ipv6 = 1;
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_ENABLE_JUMBO,
+ sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
+ struct option lenopts = {
+ "max-pkt-len", required_argument, 0, 0
+ };
+
+ printf("%s\n", str8);
+ port_conf.rxmode.jumbo_frame = 1;
+
+ /*
+ * if no max-pkt-len set, use the default
+ * value ETHER_MAX_LEN.
+ */
+ if (0 == getopt_long(argc, argvopt, "",
+ &lenopts, &option_index)) {
+ ret = parse_max_pkt_len(optarg);
+ if ((ret < 64) ||
+ (ret > MAX_JUMBO_PKT_LEN)) {
+ printf("%s\n", str9);
+ print_usage(prgname);
+ return -1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = ret;
+ }
+ printf("%s %u\n", str10,
+ (unsigned int)port_conf.rxmode.max_rx_pkt_len);
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_HASH_ENTRY_NUM,
+ sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
+
+ ret = parse_hash_entry_number(optarg);
+ if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
+ hash_entry_number = ret;
+ } else {
+ printf("%s\n", str11);
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name,
+ CMD_LINE_OPT_PARSE_PTYPE,
+ sizeof(CMD_LINE_OPT_PARSE_PTYPE))) {
+ printf("soft parse-ptype is enabled\n");
+ parse_ptype = 1;
+ }
+
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ /* If both LPM and EM are selected, return error. */
+ if (l3fwd_lpm_on && l3fwd_em_on) {
+ printf("%s\n", str12);
+ return -1;
+ }
+
+ /*
+ * Nothing is selected, pick longest-prefix match
+ * as default match.
+ */
+ if (!l3fwd_lpm_on && !l3fwd_em_on) {
+ l3fwd_lpm_on = 1;
+ printf("%s\n", str13);
+ }
+
+ /*
+ * ipv6 and hash flags are valid only for
+ * exact macth, reset them to default for
+ * longest-prefix match.
+ */
+ if (l3fwd_lpm_on) {
+ ipv6 = 0;
+ hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+static int
+init_mem(unsigned nb_mbuf)
+{
+ struct lcore_conf *qconf;
+ int socketid;
+ unsigned lcore_id;
+ char s[64];
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ if (socketid >= NB_SOCKETS) {
+ rte_exit(EXIT_FAILURE,
+ "Socket %d of lcore %u is out of range %d\n",
+ socketid, lcore_id, NB_SOCKETS);
+ }
+
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] =
+ rte_pktmbuf_pool_create(s, nb_mbuf,
+ MEMPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+ if (pktmbuf_pool[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Cannot init mbuf pool on socket %d\n",
+ socketid);
+ else
+ printf("Allocated mbuf pool on socket %d\n",
+ socketid);
+
+ /* Setup either LPM or EM(f.e Hash). */
+ l3fwd_lkp.setup(socketid);
+ }
+ qconf = &lcore_conf[lcore_id];
+ qconf->ipv4_lookup_struct =
+ l3fwd_lkp.get_ipv4_lookup_struct(socketid);
+ qconf->ipv6_lookup_struct =
+ l3fwd_lkp.get_ipv6_lookup_struct(socketid);
+ }
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ if (force_quit)
+ return;
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if (force_quit)
+ return;
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+static void
+signal_handler(int signum)
+{
+ if (signum == SIGINT || signum == SIGTERM) {
+ printf("\n\nSignal %d received, preparing to exit...\n",
+ signum);
+ force_quit = true;
+ }
+}
+
+static int
+prepare_ptype_parser(uint8_t portid, uint16_t queueid)
+{
+ if (parse_ptype) {
+ printf("Port %d: softly parse packet type info\n", portid);
+ if (rte_eth_add_rx_callback(portid, queueid,
+ l3fwd_lkp.cb_parse_ptype,
+ NULL))
+ return 1;
+
+ printf("Failed to add rx callback: port=%d\n", portid);
+ return 0;
+ }
+
+ if (l3fwd_lkp.check_ptype(portid))
+ return 1;
+
+ printf("port %d cannot parse packet type, please add --%s\n",
+ portid, CMD_LINE_OPT_PARSE_PTYPE);
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid, nb_rx_queue, queue, socketid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ force_quit = false;
+ signal(SIGINT, signal_handler);
+ signal(SIGTERM, signal_handler);
+
+ /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ dest_eth_addr[portid] =
+ ETHER_LOCAL_ADMIN_ADDR + ((uint64_t)portid << 40);
+ *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
+ }
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ ret = init_lcore_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_lcore_rx_queues failed\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_exit(EXIT_FAILURE, "check_port_config failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* Setup function pointers for lookup method. */
+ setup_l3fwd_lookup_tables();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid );
+ fflush(stdout);
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)n_tx_queue );
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)n_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+ print_ethaddr("Destination:",
+ (const struct ether_addr *)&dest_eth_addr[portid]);
+ printf(", ");
+
+ /*
+ * prepare src MACs for each port.
+ */
+ ether_addr_copy(&ports_eth_addr[portid],
+ (struct ether_addr *)(val_eth + portid) + 1);
+
+ /* init memory */
+ ret = init_mem(NB_MBUF);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_mem failed\n");
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid =
+ (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (port_conf.rxmode.jumbo_frame)
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socketid, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ qconf = &lcore_conf[lcore_id];
+ qconf->tx_queue_id[portid] = queueid;
+ queueid++;
+
+ qconf->tx_port_id[qconf->n_tx_port] = portid;
+ qconf->n_tx_port++;
+ }
+ printf("\n");
+ }
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ qconf = &lcore_conf[lcore_id];
+ printf("\nInitializing rx queues on lcore %u ... ", lcore_id );
+ fflush(stdout);
+ /* init RX queues */
+ for(queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+
+ if (numa_on)
+ socketid =
+ (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid,
+ NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_rx_queue_setup: err=%d, port=%d\n",
+ ret, portid);
+ }
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ continue;
+ }
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE,
+ "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ printf("\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+ qconf = &lcore_conf[lcore_id];
+ for (queue = 0; queue < qconf->n_rx_queue; ++queue) {
+ portid = qconf->rx_queue_list[queue].port_id;
+ queueid = qconf->rx_queue_list[queue].queue_id;
+ if (prepare_ptype_parser(portid, queueid) == 0)
+ rte_exit(EXIT_FAILURE, "ptype check fails\n");
+ }
+ }
+
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ ret = 0;
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(l3fwd_lkp.main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0) {
+ ret = -1;
+ break;
+ }
+ }
+
+ /* stop ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("Closing port %d...", portid);
+ rte_eth_dev_stop(portid);
+ rte_eth_dev_close(portid);
+ printf(" Done\n");
+ }
+ printf("Bye...\n");
+
+ return ret;
+}
diff --git a/examples/link_status_interrupt/Makefile b/examples/link_status_interrupt/Makefile
new file mode 100644
index 00000000..9ecc7fc4
--- /dev/null
+++ b/examples/link_status_interrupt/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = link_status_interrupt
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/link_status_interrupt/main.c b/examples/link_status_interrupt/main.c
new file mode 100644
index 00000000..99815989
--- /dev/null
+++ b/examples/link_status_interrupt/main.c
@@ -0,0 +1,732 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+
+#define RTE_LOGTYPE_LSI RTE_LOGTYPE_USER1
+
+#define NB_MBUF 8192
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr lsi_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t lsi_enabled_port_mask = 0;
+
+static unsigned int lsi_rx_queue_per_lcore = 1;
+
+/* destination port for L2 forwarding */
+static unsigned lsi_dst_ports[RTE_MAX_ETHPORTS] = {0};
+
+#define MAX_PKT_BURST 32
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+ unsigned tx_queue_id;
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .intr_conf = {
+ .lsc = 1, /**< lsc interrupt feature enabled */
+ },
+};
+
+struct rte_mempool * lsi_pktmbuf_pool = NULL;
+
+/* Per-port statistics struct */
+struct lsi_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+struct lsi_port_statistics port_statistics[RTE_MAX_ETHPORTS];
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */
+#define MAX_TIMER_PERIOD 86400 /* 1 day max */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ struct rte_eth_link link;
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip ports that are not enabled */
+ if ((lsi_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait((uint8_t)portid, &link);
+ printf("\nStatistics for port %u ------------------------------"
+ "\nLink status: %25s"
+ "\nLink speed: %26u"
+ "\nLink duplex: %25s"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ (link.link_status ? "Link up" : "Link down"),
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX ? \
+ "full-duplex" : "half-duplex"),
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+ printf("\n====================================================\n");
+}
+
+static void
+lsi_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ unsigned dst_port = lsi_dst_ports[portid];
+ int sent;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&lsi_ports_eth_addr[dst_port], &eth->s_addr);
+
+ buffer = tx_buffer[dst_port];
+ sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+ if (sent)
+ port_statistics[dst_port].tx += sent;
+}
+
+/* main processing loop */
+static void
+lsi_main_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ unsigned lcore_id;
+ unsigned sent;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
+ BURST_TX_DRAIN_US;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ prev_tsc = 0;
+ timer_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, LSI, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, LSI, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, LSI, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = lsi_dst_ports[qconf->rx_port_list[i]];
+ buffer = tx_buffer[portid];
+
+ sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
+ if (sent)
+ port_statistics[portid].tx += sent;
+
+ }
+
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
+
+ /* do this only on master core */
+ if (lcore_id == rte_get_master_lcore()) {
+ print_stats();
+ /* reset the timer */
+ timer_tsc = 0;
+ }
+ }
+ }
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ port_statistics[portid].rx += nb_rx;
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ lsi_simple_forward(m, portid);
+ }
+ }
+ }
+}
+
+static int
+lsi_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ lsi_main_loop();
+ return 0;
+}
+
+/* display usage */
+static void
+lsi_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK [-q NQ]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static int
+lsi_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static unsigned int
+lsi_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+lsi_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+lsi_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:T:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ lsi_enabled_port_mask = lsi_parse_portmask(optarg);
+ if (lsi_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ lsi_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* nqueue */
+ case 'q':
+ lsi_rx_queue_per_lcore = lsi_parse_nqueue(optarg);
+ if (lsi_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ lsi_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = lsi_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND;
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ lsi_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* long options */
+ case 0:
+ lsi_usage(prgname);
+ return -1;
+
+ default:
+ lsi_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/**
+ * It will be called as the callback for specified port after a LSI interrupt
+ * has been fully handled. This callback needs to be implemented carefully as
+ * it will be called in the interrupt host thread which is different from the
+ * application main thread.
+ *
+ * @param port_id
+ * Port id.
+ * @param type
+ * event type.
+ * @param param
+ * Pointer to(address of) the parameters.
+ *
+ * @return
+ * void.
+ */
+static void
+lsi_event_callback(uint8_t port_id, enum rte_eth_event_type type, void *param)
+{
+ struct rte_eth_link link;
+
+ RTE_SET_USED(param);
+
+ printf("\n\nIn registered callback...\n");
+ printf("Event type: %s\n", type == RTE_ETH_EVENT_INTR_LSC ? "LSC interrupt" : "unknown event");
+ rte_eth_link_get_nowait(port_id, &link);
+ if (link.link_status) {
+ printf("Port %d Link Up - speed %u Mbps - %s\n\n",
+ port_id, (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex"));
+ } else
+ printf("Port %d Link Down\n\n", port_id);
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ int ret;
+ uint8_t nb_ports;
+ uint8_t portid, portid_last = 0;
+ unsigned lcore_id, rx_lcore_id;
+ unsigned nb_ports_in_mask = 0;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eal_init failed");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = lsi_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid arguments");
+
+ /* create the mbuf pool */
+ lsi_pktmbuf_pool =
+ rte_pktmbuf_pool_create("mbuf_pool", NB_MBUF, 32, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (lsi_pktmbuf_pool == NULL)
+ rte_panic("Cannot init mbuf pool\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_panic("No Ethernet port - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((lsi_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* save the destination port id */
+ if (nb_ports_in_mask % 2) {
+ lsi_dst_ports[portid] = portid_last;
+ lsi_dst_ports[portid_last] = portid;
+ }
+ else
+ portid_last = portid;
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask < 2 || nb_ports_in_mask % 2)
+ rte_exit(EXIT_FAILURE, "Current enabled port number is %u, "
+ "but it should be even and at least 2\n",
+ nb_ports_in_mask);
+
+ rx_lcore_id = 0;
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((lsi_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ lsi_rx_queue_per_lcore) {
+
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->n_rx_port++;
+ printf("Lcore %u: RX port %u\n",rx_lcore_id, (unsigned) portid);
+ }
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((lsi_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n", (unsigned) portid);
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* register lsi interrupt callback, need to be after
+ * rte_eth_dev_configure(). if (intr_conf.lsc == 0), no
+ * lsc interrupt will be present, and below callback to
+ * be registered will never be called.
+ */
+ rte_eth_dev_callback_register(portid,
+ RTE_ETH_EVENT_INTR_LSC, lsi_event_callback, NULL);
+
+ rte_eth_macaddr_get(portid,
+ &lsi_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ lsi_pktmbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue logical core on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d,port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Initialize TX buffers */
+ tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
+ rte_eth_tx_buffer_count_callback,
+ &port_statistics[portid].dropped);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+ printf("done:\n");
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ lsi_ports_eth_addr[portid].addr_bytes[0],
+ lsi_ports_eth_addr[portid].addr_bytes[1],
+ lsi_ports_eth_addr[portid].addr_bytes[2],
+ lsi_ports_eth_addr[portid].addr_bytes[3],
+ lsi_ports_eth_addr[portid].addr_bytes[4],
+ lsi_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ check_all_ports_link_status(nb_ports, lsi_enabled_port_mask);
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(lsi_launch_one_lcore, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/load_balancer/Makefile b/examples/load_balancer/Makefile
new file mode 100644
index 00000000..2c5fd9b0
--- /dev/null
+++ b/examples/load_balancer/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = load_balancer
+
+# all source are stored in SRCS-y
+SRCS-y := main.c config.c init.c runtime.c
+
+CFLAGS += -O3 -g
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS_config.o := -D_GNU_SOURCE
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/load_balancer/config.c b/examples/load_balancer/config.c
new file mode 100644
index 00000000..3f6ddee5
--- /dev/null
+++ b/examples/load_balancer/config.c
@@ -0,0 +1,1063 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_lpm.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+
+struct app_params app;
+
+static const char usage[] =
+" \n"
+" load_balancer <EAL PARAMS> -- <APP PARAMS> \n"
+" \n"
+"Application manadatory parameters: \n"
+" --rx \"(PORT, QUEUE, LCORE), ...\" : List of NIC RX ports and queues \n"
+" handled by the I/O RX lcores \n"
+" --tx \"(PORT, LCORE), ...\" : List of NIC TX ports handled by the I/O TX \n"
+" lcores \n"
+" --w \"LCORE, ...\" : List of the worker lcores \n"
+" --lpm \"IP / PREFIX => PORT; ...\" : List of LPM rules used by the worker \n"
+" lcores for packet forwarding \n"
+" \n"
+"Application optional parameters: \n"
+" --rsz \"A, B, C, D\" : Ring sizes \n"
+" A = Size (in number of buffer descriptors) of each of the NIC RX \n"
+" rings read by the I/O RX lcores (default value is %u) \n"
+" B = Size (in number of elements) of each of the SW rings used by the\n"
+" I/O RX lcores to send packets to worker lcores (default value is\n"
+" %u) \n"
+" C = Size (in number of elements) of each of the SW rings used by the\n"
+" worker lcores to send packets to I/O TX lcores (default value is\n"
+" %u) \n"
+" D = Size (in number of buffer descriptors) of each of the NIC TX \n"
+" rings written by I/O TX lcores (default value is %u) \n"
+" --bsz \"(A, B), (C, D), (E, F)\" : Burst sizes \n"
+" A = I/O RX lcore read burst size from NIC RX (default value is %u) \n"
+" B = I/O RX lcore write burst size to output SW rings (default value \n"
+" is %u) \n"
+" C = Worker lcore read burst size from input SW rings (default value \n"
+" is %u) \n"
+" D = Worker lcore write burst size to output SW rings (default value \n"
+" is %u) \n"
+" E = I/O TX lcore read burst size from input SW rings (default value \n"
+" is %u) \n"
+" F = I/O TX lcore write burst size to NIC TX (default value is %u) \n"
+" --pos-lb POS : Position of the 1-byte field within the input packet used by\n"
+" the I/O RX lcores to identify the worker lcore for the current \n"
+" packet (default value is %u) \n";
+
+void
+app_print_usage(void)
+{
+ printf(usage,
+ APP_DEFAULT_NIC_RX_RING_SIZE,
+ APP_DEFAULT_RING_RX_SIZE,
+ APP_DEFAULT_RING_TX_SIZE,
+ APP_DEFAULT_NIC_TX_RING_SIZE,
+ APP_DEFAULT_BURST_SIZE_IO_RX_READ,
+ APP_DEFAULT_BURST_SIZE_IO_RX_WRITE,
+ APP_DEFAULT_BURST_SIZE_WORKER_READ,
+ APP_DEFAULT_BURST_SIZE_WORKER_WRITE,
+ APP_DEFAULT_BURST_SIZE_IO_TX_READ,
+ APP_DEFAULT_BURST_SIZE_IO_TX_WRITE,
+ APP_DEFAULT_IO_RX_LB_POS
+ );
+}
+
+#ifndef APP_ARG_RX_MAX_CHARS
+#define APP_ARG_RX_MAX_CHARS 4096
+#endif
+
+#ifndef APP_ARG_RX_MAX_TUPLES
+#define APP_ARG_RX_MAX_TUPLES 128
+#endif
+
+static int
+str_to_unsigned_array(
+ const char *s, size_t sbuflen,
+ char separator,
+ unsigned num_vals,
+ unsigned *vals)
+{
+ char str[sbuflen+1];
+ char *splits[num_vals];
+ char *endptr = NULL;
+ int i, num_splits = 0;
+
+ /* copy s so we don't modify original string */
+ snprintf(str, sizeof(str), "%s", s);
+ num_splits = rte_strsplit(str, sizeof(str), splits, num_vals, separator);
+
+ errno = 0;
+ for (i = 0; i < num_splits; i++) {
+ vals[i] = strtoul(splits[i], &endptr, 0);
+ if (errno != 0 || *endptr != '\0')
+ return -1;
+ }
+
+ return num_splits;
+}
+
+static int
+str_to_unsigned_vals(
+ const char *s,
+ size_t sbuflen,
+ char separator,
+ unsigned num_vals, ...)
+{
+ unsigned i, vals[num_vals];
+ va_list ap;
+
+ num_vals = str_to_unsigned_array(s, sbuflen, separator, num_vals, vals);
+
+ va_start(ap, num_vals);
+ for (i = 0; i < num_vals; i++) {
+ unsigned *u = va_arg(ap, unsigned *);
+ *u = vals[i];
+ }
+ va_end(ap);
+ return num_vals;
+}
+
+static int
+parse_arg_rx(const char *arg)
+{
+ const char *p0 = arg, *p = arg;
+ uint32_t n_tuples;
+
+ if (strnlen(arg, APP_ARG_RX_MAX_CHARS + 1) == APP_ARG_RX_MAX_CHARS + 1) {
+ return -1;
+ }
+
+ n_tuples = 0;
+ while ((p = strchr(p0,'(')) != NULL) {
+ struct app_lcore_params *lp;
+ uint32_t port, queue, lcore, i;
+
+ p0 = strchr(p++, ')');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, ',', 3, &port, &queue, &lcore) != 3)) {
+ return -2;
+ }
+
+ /* Enable port and queue for later initialization */
+ if ((port >= APP_MAX_NIC_PORTS) || (queue >= APP_MAX_RX_QUEUES_PER_NIC_PORT)) {
+ return -3;
+ }
+ if (app.nic_rx_queue_mask[port][queue] != 0) {
+ return -4;
+ }
+ app.nic_rx_queue_mask[port][queue] = 1;
+
+ /* Check and assign (port, queue) to I/O lcore */
+ if (rte_lcore_is_enabled(lcore) == 0) {
+ return -5;
+ }
+
+ if (lcore >= APP_MAX_LCORES) {
+ return -6;
+ }
+ lp = &app.lcore_params[lcore];
+ if (lp->type == e_APP_LCORE_WORKER) {
+ return -7;
+ }
+ lp->type = e_APP_LCORE_IO;
+ const size_t n_queues = RTE_MIN(lp->io.rx.n_nic_queues,
+ RTE_DIM(lp->io.rx.nic_queues));
+ for (i = 0; i < n_queues; i ++) {
+ if ((lp->io.rx.nic_queues[i].port == port) &&
+ (lp->io.rx.nic_queues[i].queue == queue)) {
+ return -8;
+ }
+ }
+ if (lp->io.rx.n_nic_queues >= APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE) {
+ return -9;
+ }
+ lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].port = (uint8_t) port;
+ lp->io.rx.nic_queues[lp->io.rx.n_nic_queues].queue = (uint8_t) queue;
+ lp->io.rx.n_nic_queues ++;
+
+ n_tuples ++;
+ if (n_tuples > APP_ARG_RX_MAX_TUPLES) {
+ return -10;
+ }
+ }
+
+ if (n_tuples == 0) {
+ return -11;
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_TX_MAX_CHARS
+#define APP_ARG_TX_MAX_CHARS 4096
+#endif
+
+#ifndef APP_ARG_TX_MAX_TUPLES
+#define APP_ARG_TX_MAX_TUPLES 128
+#endif
+
+static int
+parse_arg_tx(const char *arg)
+{
+ const char *p0 = arg, *p = arg;
+ uint32_t n_tuples;
+
+ if (strnlen(arg, APP_ARG_TX_MAX_CHARS + 1) == APP_ARG_TX_MAX_CHARS + 1) {
+ return -1;
+ }
+
+ n_tuples = 0;
+ while ((p = strchr(p0,'(')) != NULL) {
+ struct app_lcore_params *lp;
+ uint32_t port, lcore, i;
+
+ p0 = strchr(p++, ')');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, ',', 2, &port, &lcore) != 2)) {
+ return -2;
+ }
+
+ /* Enable port and queue for later initialization */
+ if (port >= APP_MAX_NIC_PORTS) {
+ return -3;
+ }
+ if (app.nic_tx_port_mask[port] != 0) {
+ return -4;
+ }
+ app.nic_tx_port_mask[port] = 1;
+
+ /* Check and assign (port, queue) to I/O lcore */
+ if (rte_lcore_is_enabled(lcore) == 0) {
+ return -5;
+ }
+
+ if (lcore >= APP_MAX_LCORES) {
+ return -6;
+ }
+ lp = &app.lcore_params[lcore];
+ if (lp->type == e_APP_LCORE_WORKER) {
+ return -7;
+ }
+ lp->type = e_APP_LCORE_IO;
+ const size_t n_ports = RTE_MIN(lp->io.tx.n_nic_ports,
+ RTE_DIM(lp->io.tx.nic_ports));
+ for (i = 0; i < n_ports; i ++) {
+ if (lp->io.tx.nic_ports[i] == port) {
+ return -8;
+ }
+ }
+ if (lp->io.tx.n_nic_ports >= APP_MAX_NIC_TX_PORTS_PER_IO_LCORE) {
+ return -9;
+ }
+ lp->io.tx.nic_ports[lp->io.tx.n_nic_ports] = (uint8_t) port;
+ lp->io.tx.n_nic_ports ++;
+
+ n_tuples ++;
+ if (n_tuples > APP_ARG_TX_MAX_TUPLES) {
+ return -10;
+ }
+ }
+
+ if (n_tuples == 0) {
+ return -11;
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_W_MAX_CHARS
+#define APP_ARG_W_MAX_CHARS 4096
+#endif
+
+#ifndef APP_ARG_W_MAX_TUPLES
+#define APP_ARG_W_MAX_TUPLES APP_MAX_WORKER_LCORES
+#endif
+
+static int
+parse_arg_w(const char *arg)
+{
+ const char *p = arg;
+ uint32_t n_tuples;
+
+ if (strnlen(arg, APP_ARG_W_MAX_CHARS + 1) == APP_ARG_W_MAX_CHARS + 1) {
+ return -1;
+ }
+
+ n_tuples = 0;
+ while (*p != 0) {
+ struct app_lcore_params *lp;
+ uint32_t lcore;
+
+ errno = 0;
+ lcore = strtoul(p, NULL, 0);
+ if ((errno != 0)) {
+ return -2;
+ }
+
+ /* Check and enable worker lcore */
+ if (rte_lcore_is_enabled(lcore) == 0) {
+ return -3;
+ }
+
+ if (lcore >= APP_MAX_LCORES) {
+ return -4;
+ }
+ lp = &app.lcore_params[lcore];
+ if (lp->type == e_APP_LCORE_IO) {
+ return -5;
+ }
+ lp->type = e_APP_LCORE_WORKER;
+
+ n_tuples ++;
+ if (n_tuples > APP_ARG_W_MAX_TUPLES) {
+ return -6;
+ }
+
+ p = strchr(p, ',');
+ if (p == NULL) {
+ break;
+ }
+ p ++;
+ }
+
+ if (n_tuples == 0) {
+ return -7;
+ }
+
+ if ((n_tuples & (n_tuples - 1)) != 0) {
+ return -8;
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_LPM_MAX_CHARS
+#define APP_ARG_LPM_MAX_CHARS 4096
+#endif
+
+static int
+parse_arg_lpm(const char *arg)
+{
+ const char *p = arg, *p0;
+
+ if (strnlen(arg, APP_ARG_LPM_MAX_CHARS + 1) == APP_ARG_TX_MAX_CHARS + 1) {
+ return -1;
+ }
+
+ while (*p != 0) {
+ uint32_t ip_a, ip_b, ip_c, ip_d, ip, depth, if_out;
+ char *endptr;
+
+ p0 = strchr(p, '/');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, '.', 4, &ip_a, &ip_b, &ip_c, &ip_d) != 4)) {
+ return -2;
+ }
+
+ p = p0 + 1;
+ errno = 0;
+ depth = strtoul(p, &endptr, 0);
+ if (errno != 0 || *endptr != '=') {
+ return -3;
+ }
+ p = strchr(p, '>');
+ if (p == NULL) {
+ return -4;
+ }
+ if_out = strtoul(++p, &endptr, 0);
+ if (errno != 0 || (*endptr != '\0' && *endptr != ';')) {
+ return -5;
+ }
+
+ if ((ip_a >= 256) || (ip_b >= 256) || (ip_c >= 256) || (ip_d >= 256) ||
+ (depth == 0) || (depth >= 32) ||
+ (if_out >= APP_MAX_NIC_PORTS)) {
+ return -6;
+ }
+ ip = (ip_a << 24) | (ip_b << 16) | (ip_c << 8) | ip_d;
+
+ if (app.n_lpm_rules >= APP_MAX_LPM_RULES) {
+ return -7;
+ }
+ app.lpm_rules[app.n_lpm_rules].ip = ip;
+ app.lpm_rules[app.n_lpm_rules].depth = (uint8_t) depth;
+ app.lpm_rules[app.n_lpm_rules].if_out = (uint8_t) if_out;
+ app.n_lpm_rules ++;
+
+ p = strchr(p, ';');
+ if (p == NULL) {
+ return -8;
+ }
+ p ++;
+ }
+
+ if (app.n_lpm_rules == 0) {
+ return -9;
+ }
+
+ return 0;
+}
+
+static int
+app_check_lpm_table(void)
+{
+ uint32_t rule;
+
+ /* For each rule, check that the output I/F is enabled */
+ for (rule = 0; rule < app.n_lpm_rules; rule ++)
+ {
+ uint32_t port = app.lpm_rules[rule].if_out;
+
+ if (app.nic_tx_port_mask[port] == 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+app_check_every_rx_port_is_tx_enabled(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ if ((app_get_nic_rx_queues_per_port(port) > 0) && (app.nic_tx_port_mask[port] == 0)) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_RSZ_CHARS
+#define APP_ARG_RSZ_CHARS 63
+#endif
+
+static int
+parse_arg_rsz(const char *arg)
+{
+ if (strnlen(arg, APP_ARG_RSZ_CHARS + 1) == APP_ARG_RSZ_CHARS + 1) {
+ return -1;
+ }
+
+ if (str_to_unsigned_vals(arg, APP_ARG_RSZ_CHARS, ',', 4,
+ &app.nic_rx_ring_size,
+ &app.ring_rx_size,
+ &app.ring_tx_size,
+ &app.nic_tx_ring_size) != 4)
+ return -2;
+
+
+ if ((app.nic_rx_ring_size == 0) ||
+ (app.nic_tx_ring_size == 0) ||
+ (app.ring_rx_size == 0) ||
+ (app.ring_tx_size == 0)) {
+ return -3;
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_BSZ_CHARS
+#define APP_ARG_BSZ_CHARS 63
+#endif
+
+static int
+parse_arg_bsz(const char *arg)
+{
+ const char *p = arg, *p0;
+ if (strnlen(arg, APP_ARG_BSZ_CHARS + 1) == APP_ARG_BSZ_CHARS + 1) {
+ return -1;
+ }
+
+ p0 = strchr(p++, ')');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_io_rx_read, &app.burst_size_io_rx_write) != 2)) {
+ return -2;
+ }
+
+ p = strchr(p0, '(');
+ if (p == NULL) {
+ return -3;
+ }
+
+ p0 = strchr(p++, ')');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_worker_read, &app.burst_size_worker_write) != 2)) {
+ return -4;
+ }
+
+ p = strchr(p0, '(');
+ if (p == NULL) {
+ return -5;
+ }
+
+ p0 = strchr(p++, ')');
+ if ((p0 == NULL) ||
+ (str_to_unsigned_vals(p, p0 - p, ',', 2, &app.burst_size_io_tx_read, &app.burst_size_io_tx_write) != 2)) {
+ return -6;
+ }
+
+ if ((app.burst_size_io_rx_read == 0) ||
+ (app.burst_size_io_rx_write == 0) ||
+ (app.burst_size_worker_read == 0) ||
+ (app.burst_size_worker_write == 0) ||
+ (app.burst_size_io_tx_read == 0) ||
+ (app.burst_size_io_tx_write == 0)) {
+ return -7;
+ }
+
+ if ((app.burst_size_io_rx_read > APP_MBUF_ARRAY_SIZE) ||
+ (app.burst_size_io_rx_write > APP_MBUF_ARRAY_SIZE) ||
+ (app.burst_size_worker_read > APP_MBUF_ARRAY_SIZE) ||
+ (app.burst_size_worker_write > APP_MBUF_ARRAY_SIZE) ||
+ ((2 * app.burst_size_io_tx_read) > APP_MBUF_ARRAY_SIZE) ||
+ (app.burst_size_io_tx_write > APP_MBUF_ARRAY_SIZE)) {
+ return -8;
+ }
+
+ return 0;
+}
+
+#ifndef APP_ARG_NUMERICAL_SIZE_CHARS
+#define APP_ARG_NUMERICAL_SIZE_CHARS 15
+#endif
+
+static int
+parse_arg_pos_lb(const char *arg)
+{
+ uint32_t x;
+ char *endpt;
+
+ if (strnlen(arg, APP_ARG_NUMERICAL_SIZE_CHARS + 1) == APP_ARG_NUMERICAL_SIZE_CHARS + 1) {
+ return -1;
+ }
+
+ errno = 0;
+ x = strtoul(arg, &endpt, 10);
+ if (errno != 0 || endpt == arg || *endpt != '\0'){
+ return -2;
+ }
+
+ if (x >= 64) {
+ return -3;
+ }
+
+ app.pos_lb = (uint8_t) x;
+
+ return 0;
+}
+
+/* Parse the argument given in the command line of the application */
+int
+app_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"rx", 1, 0, 0},
+ {"tx", 1, 0, 0},
+ {"w", 1, 0, 0},
+ {"lpm", 1, 0, 0},
+ {"rsz", 1, 0, 0},
+ {"bsz", 1, 0, 0},
+ {"pos-lb", 1, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+ uint32_t arg_w = 0;
+ uint32_t arg_rx = 0;
+ uint32_t arg_tx = 0;
+ uint32_t arg_lpm = 0;
+ uint32_t arg_rsz = 0;
+ uint32_t arg_bsz = 0;
+ uint32_t arg_pos_lb = 0;
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* long options */
+ case 0:
+ if (!strcmp(lgopts[option_index].name, "rx")) {
+ arg_rx = 1;
+ ret = parse_arg_rx(optarg);
+ if (ret) {
+ printf("Incorrect value for --rx argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "tx")) {
+ arg_tx = 1;
+ ret = parse_arg_tx(optarg);
+ if (ret) {
+ printf("Incorrect value for --tx argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "w")) {
+ arg_w = 1;
+ ret = parse_arg_w(optarg);
+ if (ret) {
+ printf("Incorrect value for --w argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "lpm")) {
+ arg_lpm = 1;
+ ret = parse_arg_lpm(optarg);
+ if (ret) {
+ printf("Incorrect value for --lpm argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "rsz")) {
+ arg_rsz = 1;
+ ret = parse_arg_rsz(optarg);
+ if (ret) {
+ printf("Incorrect value for --rsz argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "bsz")) {
+ arg_bsz = 1;
+ ret = parse_arg_bsz(optarg);
+ if (ret) {
+ printf("Incorrect value for --bsz argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ if (!strcmp(lgopts[option_index].name, "pos-lb")) {
+ arg_pos_lb = 1;
+ ret = parse_arg_pos_lb(optarg);
+ if (ret) {
+ printf("Incorrect value for --pos-lb argument (%d)\n", ret);
+ return -1;
+ }
+ }
+ break;
+
+ default:
+ return -1;
+ }
+ }
+
+ /* Check that all mandatory arguments are provided */
+ if ((arg_rx == 0) || (arg_tx == 0) || (arg_w == 0) || (arg_lpm == 0)){
+ printf("Not all mandatory arguments are present\n");
+ return -1;
+ }
+
+ /* Assign default values for the optional arguments not provided */
+ if (arg_rsz == 0) {
+ app.nic_rx_ring_size = APP_DEFAULT_NIC_RX_RING_SIZE;
+ app.nic_tx_ring_size = APP_DEFAULT_NIC_TX_RING_SIZE;
+ app.ring_rx_size = APP_DEFAULT_RING_RX_SIZE;
+ app.ring_tx_size = APP_DEFAULT_RING_TX_SIZE;
+ }
+
+ if (arg_bsz == 0) {
+ app.burst_size_io_rx_read = APP_DEFAULT_BURST_SIZE_IO_RX_READ;
+ app.burst_size_io_rx_write = APP_DEFAULT_BURST_SIZE_IO_RX_WRITE;
+ app.burst_size_io_tx_read = APP_DEFAULT_BURST_SIZE_IO_TX_READ;
+ app.burst_size_io_tx_write = APP_DEFAULT_BURST_SIZE_IO_TX_WRITE;
+ app.burst_size_worker_read = APP_DEFAULT_BURST_SIZE_WORKER_READ;
+ app.burst_size_worker_write = APP_DEFAULT_BURST_SIZE_WORKER_WRITE;
+ }
+
+ if (arg_pos_lb == 0) {
+ app.pos_lb = APP_DEFAULT_IO_RX_LB_POS;
+ }
+
+ /* Check cross-consistency of arguments */
+ if ((ret = app_check_lpm_table()) < 0) {
+ printf("At least one LPM rule is inconsistent (%d)\n", ret);
+ return -1;
+ }
+ if (app_check_every_rx_port_is_tx_enabled() < 0) {
+ printf("On LPM lookup miss, packet is sent back on the input port.\n");
+ printf("At least one RX port is not enabled for TX.\n");
+ return -2;
+ }
+
+ if (optind >= 0)
+ argv[optind - 1] = prgname;
+
+ ret = optind - 1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+int
+app_get_nic_rx_queues_per_port(uint8_t port)
+{
+ uint32_t i, count;
+
+ if (port >= APP_MAX_NIC_PORTS) {
+ return -1;
+ }
+
+ count = 0;
+ for (i = 0; i < APP_MAX_RX_QUEUES_PER_NIC_PORT; i ++) {
+ if (app.nic_rx_queue_mask[port][i] == 1) {
+ count ++;
+ }
+ }
+
+ return count;
+}
+
+int
+app_get_lcore_for_nic_rx(uint8_t port, uint8_t queue, uint32_t *lcore_out)
+{
+ uint32_t lcore;
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
+ uint32_t i;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_IO) {
+ continue;
+ }
+
+ const size_t n_queues = RTE_MIN(lp->rx.n_nic_queues,
+ RTE_DIM(lp->rx.nic_queues));
+ for (i = 0; i < n_queues; i ++) {
+ if ((lp->rx.nic_queues[i].port == port) &&
+ (lp->rx.nic_queues[i].queue == queue)) {
+ *lcore_out = lcore;
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int
+app_get_lcore_for_nic_tx(uint8_t port, uint32_t *lcore_out)
+{
+ uint32_t lcore;
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
+ uint32_t i;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_IO) {
+ continue;
+ }
+
+ const size_t n_ports = RTE_MIN(lp->tx.n_nic_ports,
+ RTE_DIM(lp->tx.nic_ports));
+ for (i = 0; i < n_ports; i ++) {
+ if (lp->tx.nic_ports[i] == port) {
+ *lcore_out = lcore;
+ return 0;
+ }
+ }
+ }
+
+ return -1;
+}
+
+int
+app_is_socket_used(uint32_t socket)
+{
+ uint32_t lcore;
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
+ continue;
+ }
+
+ if (socket == rte_lcore_to_socket_id(lcore)) {
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+uint32_t
+app_get_lcores_io_rx(void)
+{
+ uint32_t lcore, count;
+
+ count = 0;
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp_io->rx.n_nic_queues == 0)) {
+ continue;
+ }
+
+ count ++;
+ }
+
+ return count;
+}
+
+uint32_t
+app_get_lcores_worker(void)
+{
+ uint32_t lcore, count;
+
+ count = 0;
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ count ++;
+ }
+
+ if (count > APP_MAX_WORKER_LCORES) {
+ rte_panic("Algorithmic error (too many worker lcores)\n");
+ return 0;
+ }
+
+ return count;
+}
+
+void
+app_print_params(void)
+{
+ unsigned port, queue, lcore, rule, i, j;
+
+ /* Print NIC RX configuration */
+ printf("NIC RX ports: ");
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ uint32_t n_rx_queues = app_get_nic_rx_queues_per_port((uint8_t) port);
+
+ if (n_rx_queues == 0) {
+ continue;
+ }
+
+ printf("%u (", port);
+ for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
+ if (app.nic_rx_queue_mask[port][queue] == 1) {
+ printf("%u ", queue);
+ }
+ }
+ printf(") ");
+ }
+ printf(";\n");
+
+ /* Print I/O lcore RX params */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp->rx.n_nic_queues == 0)) {
+ continue;
+ }
+
+ printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore));
+
+ printf("RX ports ");
+ for (i = 0; i < lp->rx.n_nic_queues; i ++) {
+ printf("(%u, %u) ",
+ (unsigned) lp->rx.nic_queues[i].port,
+ (unsigned) lp->rx.nic_queues[i].queue);
+ }
+ printf("; ");
+
+ printf("Output rings ");
+ for (i = 0; i < lp->rx.n_rings; i ++) {
+ printf("%p ", lp->rx.rings[i]);
+ }
+ printf(";\n");
+ }
+
+ /* Print worker lcore RX params */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ printf("Worker lcore %u (socket %u) ID %u: ",
+ lcore,
+ rte_lcore_to_socket_id(lcore),
+ (unsigned)lp->worker_id);
+
+ printf("Input rings ");
+ for (i = 0; i < lp->n_rings_in; i ++) {
+ printf("%p ", lp->rings_in[i]);
+ }
+
+ printf(";\n");
+ }
+
+ printf("\n");
+
+ /* Print NIC TX configuration */
+ printf("NIC TX ports: ");
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ if (app.nic_tx_port_mask[port] == 1) {
+ printf("%u ", port);
+ }
+ }
+ printf(";\n");
+
+ /* Print I/O TX lcore params */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
+ uint32_t n_workers = app_get_lcores_worker();
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp->tx.n_nic_ports == 0)) {
+ continue;
+ }
+
+ printf("I/O lcore %u (socket %u): ", lcore, rte_lcore_to_socket_id(lcore));
+
+ printf("Input rings per TX port ");
+ for (i = 0; i < lp->tx.n_nic_ports; i ++) {
+ port = lp->tx.nic_ports[i];
+
+ printf("%u (", port);
+ for (j = 0; j < n_workers; j ++) {
+ printf("%p ", lp->tx.rings[port][j]);
+ }
+ printf(") ");
+
+ }
+
+ printf(";\n");
+ }
+
+ /* Print worker lcore TX params */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ printf("Worker lcore %u (socket %u) ID %u: \n",
+ lcore,
+ rte_lcore_to_socket_id(lcore),
+ (unsigned)lp->worker_id);
+
+ printf("Output rings per TX port ");
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ if (lp->rings_out[port] != NULL) {
+ printf("%u (%p) ", port, lp->rings_out[port]);
+ }
+ }
+
+ printf(";\n");
+ }
+
+ /* Print LPM rules */
+ printf("LPM rules: \n");
+ for (rule = 0; rule < app.n_lpm_rules; rule ++) {
+ uint32_t ip = app.lpm_rules[rule].ip;
+ uint8_t depth = app.lpm_rules[rule].depth;
+ uint8_t if_out = app.lpm_rules[rule].if_out;
+
+ printf("\t%u: %u.%u.%u.%u/%u => %u;\n",
+ rule,
+ (unsigned) (ip & 0xFF000000) >> 24,
+ (unsigned) (ip & 0x00FF0000) >> 16,
+ (unsigned) (ip & 0x0000FF00) >> 8,
+ (unsigned) ip & 0x000000FF,
+ (unsigned) depth,
+ (unsigned) if_out
+ );
+ }
+
+ /* Rings */
+ printf("Ring sizes: NIC RX = %u; Worker in = %u; Worker out = %u; NIC TX = %u;\n",
+ (unsigned) app.nic_rx_ring_size,
+ (unsigned) app.ring_rx_size,
+ (unsigned) app.ring_tx_size,
+ (unsigned) app.nic_tx_ring_size);
+
+ /* Bursts */
+ printf("Burst sizes: I/O RX (rd = %u, wr = %u); Worker (rd = %u, wr = %u); I/O TX (rd = %u, wr = %u)\n",
+ (unsigned) app.burst_size_io_rx_read,
+ (unsigned) app.burst_size_io_rx_write,
+ (unsigned) app.burst_size_worker_read,
+ (unsigned) app.burst_size_worker_write,
+ (unsigned) app.burst_size_io_tx_read,
+ (unsigned) app.burst_size_io_tx_write);
+}
diff --git a/examples/load_balancer/init.c b/examples/load_balancer/init.c
new file mode 100644
index 00000000..e07850be
--- /dev/null
+++ b/examples/load_balancer/init.c
@@ -0,0 +1,521 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_lpm.h>
+
+#include "main.h"
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static void
+app_assign_worker_ids(void)
+{
+ uint32_t lcore, worker_id;
+
+ /* Assign ID for each worker */
+ worker_id = 0;
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ lp_worker->worker_id = worker_id;
+ worker_id ++;
+ }
+}
+
+static void
+app_init_mbuf_pools(void)
+{
+ unsigned socket, lcore;
+
+ /* Init the buffer pools */
+ for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
+ char name[32];
+ if (app_is_socket_used(socket) == 0) {
+ continue;
+ }
+
+ snprintf(name, sizeof(name), "mbuf_pool_%u", socket);
+ printf("Creating the mbuf pool for socket %u ...\n", socket);
+ app.pools[socket] = rte_pktmbuf_pool_create(
+ name, APP_DEFAULT_MEMPOOL_BUFFERS,
+ APP_DEFAULT_MEMPOOL_CACHE_SIZE,
+ 0, APP_DEFAULT_MBUF_DATA_SIZE, socket);
+ if (app.pools[socket] == NULL) {
+ rte_panic("Cannot create mbuf pool on socket %u\n", socket);
+ }
+ }
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ if (app.lcore_params[lcore].type == e_APP_LCORE_DISABLED) {
+ continue;
+ }
+
+ socket = rte_lcore_to_socket_id(lcore);
+ app.lcore_params[lcore].pool = app.pools[socket];
+ }
+}
+
+static void
+app_init_lpm_tables(void)
+{
+ unsigned socket, lcore;
+
+ /* Init the LPM tables */
+ for (socket = 0; socket < APP_MAX_SOCKETS; socket ++) {
+ char name[32];
+ uint32_t rule;
+
+ if (app_is_socket_used(socket) == 0) {
+ continue;
+ }
+
+ struct rte_lpm_config lpm_config;
+
+ lpm_config.max_rules = APP_MAX_LPM_RULES;
+ lpm_config.number_tbl8s = 256;
+ lpm_config.flags = 0;
+ snprintf(name, sizeof(name), "lpm_table_%u", socket);
+ printf("Creating the LPM table for socket %u ...\n", socket);
+ app.lpm_tables[socket] = rte_lpm_create(
+ name,
+ socket,
+ &lpm_config);
+ if (app.lpm_tables[socket] == NULL) {
+ rte_panic("Unable to create LPM table on socket %u\n", socket);
+ }
+
+ for (rule = 0; rule < app.n_lpm_rules; rule ++) {
+ int ret;
+
+ ret = rte_lpm_add(app.lpm_tables[socket],
+ app.lpm_rules[rule].ip,
+ app.lpm_rules[rule].depth,
+ app.lpm_rules[rule].if_out);
+
+ if (ret < 0) {
+ rte_panic("Unable to add entry %u (%x/%u => %u) to the LPM table on socket %u (%d)\n",
+ (unsigned) rule,
+ (unsigned) app.lpm_rules[rule].ip,
+ (unsigned) app.lpm_rules[rule].depth,
+ (unsigned) app.lpm_rules[rule].if_out,
+ socket,
+ ret);
+ }
+ }
+
+ }
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ socket = rte_lcore_to_socket_id(lcore);
+ app.lcore_params[lcore].worker.lpm_table = app.lpm_tables[socket];
+ }
+}
+
+static void
+app_init_rings_rx(void)
+{
+ unsigned lcore;
+
+ /* Initialize the rings for the RX side */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
+ unsigned socket_io, lcore_worker;
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp_io->rx.n_nic_queues == 0)) {
+ continue;
+ }
+
+ socket_io = rte_lcore_to_socket_id(lcore);
+
+ for (lcore_worker = 0; lcore_worker < APP_MAX_LCORES; lcore_worker ++) {
+ char name[32];
+ struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore_worker].worker;
+ struct rte_ring *ring = NULL;
+
+ if (app.lcore_params[lcore_worker].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ printf("Creating ring to connect I/O lcore %u (socket %u) with worker lcore %u ...\n",
+ lcore,
+ socket_io,
+ lcore_worker);
+ snprintf(name, sizeof(name), "app_ring_rx_s%u_io%u_w%u",
+ socket_io,
+ lcore,
+ lcore_worker);
+ ring = rte_ring_create(
+ name,
+ app.ring_rx_size,
+ socket_io,
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (ring == NULL) {
+ rte_panic("Cannot create ring to connect I/O core %u with worker core %u\n",
+ lcore,
+ lcore_worker);
+ }
+
+ lp_io->rx.rings[lp_io->rx.n_rings] = ring;
+ lp_io->rx.n_rings ++;
+
+ lp_worker->rings_in[lp_worker->n_rings_in] = ring;
+ lp_worker->n_rings_in ++;
+ }
+ }
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp_io->rx.n_nic_queues == 0)) {
+ continue;
+ }
+
+ if (lp_io->rx.n_rings != app_get_lcores_worker()) {
+ rte_panic("Algorithmic error (I/O RX rings)\n");
+ }
+ }
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ if (lp_worker->n_rings_in != app_get_lcores_io_rx()) {
+ rte_panic("Algorithmic error (worker input rings)\n");
+ }
+ }
+}
+
+static void
+app_init_rings_tx(void)
+{
+ unsigned lcore;
+
+ /* Initialize the rings for the TX side */
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_worker *lp_worker = &app.lcore_params[lcore].worker;
+ unsigned port;
+
+ if (app.lcore_params[lcore].type != e_APP_LCORE_WORKER) {
+ continue;
+ }
+
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ char name[32];
+ struct app_lcore_params_io *lp_io = NULL;
+ struct rte_ring *ring;
+ uint32_t socket_io, lcore_io;
+
+ if (app.nic_tx_port_mask[port] == 0) {
+ continue;
+ }
+
+ if (app_get_lcore_for_nic_tx((uint8_t) port, &lcore_io) < 0) {
+ rte_panic("Algorithmic error (no I/O core to handle TX of port %u)\n",
+ port);
+ }
+
+ lp_io = &app.lcore_params[lcore_io].io;
+ socket_io = rte_lcore_to_socket_id(lcore_io);
+
+ printf("Creating ring to connect worker lcore %u with TX port %u (through I/O lcore %u) (socket %u) ...\n",
+ lcore, port, (unsigned)lcore_io, (unsigned)socket_io);
+ snprintf(name, sizeof(name), "app_ring_tx_s%u_w%u_p%u", socket_io, lcore, port);
+ ring = rte_ring_create(
+ name,
+ app.ring_tx_size,
+ socket_io,
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (ring == NULL) {
+ rte_panic("Cannot create ring to connect worker core %u with TX port %u\n",
+ lcore,
+ port);
+ }
+
+ lp_worker->rings_out[port] = ring;
+ lp_io->tx.rings[port][lp_worker->worker_id] = ring;
+ }
+ }
+
+ for (lcore = 0; lcore < APP_MAX_LCORES; lcore ++) {
+ struct app_lcore_params_io *lp_io = &app.lcore_params[lcore].io;
+ unsigned i;
+
+ if ((app.lcore_params[lcore].type != e_APP_LCORE_IO) ||
+ (lp_io->tx.n_nic_ports == 0)) {
+ continue;
+ }
+
+ for (i = 0; i < lp_io->tx.n_nic_ports; i ++){
+ unsigned port, j;
+
+ port = lp_io->tx.nic_ports[i];
+ for (j = 0; j < app_get_lcores_worker(); j ++) {
+ if (lp_io->tx.rings[port][j] == NULL) {
+ rte_panic("Algorithmic error (I/O TX rings)\n");
+ }
+ }
+ }
+ }
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+ uint32_t n_rx_queues, n_tx_queues;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ n_rx_queues = app_get_nic_rx_queues_per_port(portid);
+ n_tx_queues = app.nic_tx_port_mask[portid];
+ if ((n_rx_queues == 0) && (n_tx_queues == 0))
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+static void
+app_init_nics(void)
+{
+ unsigned socket;
+ uint32_t lcore;
+ uint8_t port, queue;
+ int ret;
+ uint32_t n_rx_queues, n_tx_queues;
+
+ /* Init NIC ports and queues, then start the ports */
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ struct rte_mempool *pool;
+
+ n_rx_queues = app_get_nic_rx_queues_per_port(port);
+ n_tx_queues = app.nic_tx_port_mask[port];
+
+ if ((n_rx_queues == 0) && (n_tx_queues == 0)) {
+ continue;
+ }
+
+ /* Init port */
+ printf("Initializing NIC port %u ...\n", (unsigned) port);
+ ret = rte_eth_dev_configure(
+ port,
+ (uint8_t) n_rx_queues,
+ (uint8_t) n_tx_queues,
+ &port_conf);
+ if (ret < 0) {
+ rte_panic("Cannot init NIC port %u (%d)\n", (unsigned) port, ret);
+ }
+ rte_eth_promiscuous_enable(port);
+
+ /* Init RX queues */
+ for (queue = 0; queue < APP_MAX_RX_QUEUES_PER_NIC_PORT; queue ++) {
+ if (app.nic_rx_queue_mask[port][queue] == 0) {
+ continue;
+ }
+
+ app_get_lcore_for_nic_rx(port, queue, &lcore);
+ socket = rte_lcore_to_socket_id(lcore);
+ pool = app.lcore_params[lcore].pool;
+
+ printf("Initializing NIC port %u RX queue %u ...\n",
+ (unsigned) port,
+ (unsigned) queue);
+ ret = rte_eth_rx_queue_setup(
+ port,
+ queue,
+ (uint16_t) app.nic_rx_ring_size,
+ socket,
+ NULL,
+ pool);
+ if (ret < 0) {
+ rte_panic("Cannot init RX queue %u for port %u (%d)\n",
+ (unsigned) queue,
+ (unsigned) port,
+ ret);
+ }
+ }
+
+ /* Init TX queues */
+ if (app.nic_tx_port_mask[port] == 1) {
+ app_get_lcore_for_nic_tx(port, &lcore);
+ socket = rte_lcore_to_socket_id(lcore);
+ printf("Initializing NIC port %u TX queue 0 ...\n",
+ (unsigned) port);
+ ret = rte_eth_tx_queue_setup(
+ port,
+ 0,
+ (uint16_t) app.nic_tx_ring_size,
+ socket,
+ NULL);
+ if (ret < 0) {
+ rte_panic("Cannot init TX queue 0 for port %d (%d)\n",
+ port,
+ ret);
+ }
+ }
+
+ /* Start port */
+ ret = rte_eth_dev_start(port);
+ if (ret < 0) {
+ rte_panic("Cannot start port %d (%d)\n", port, ret);
+ }
+ }
+
+ check_all_ports_link_status(APP_MAX_NIC_PORTS, (~0x0));
+}
+
+void
+app_init(void)
+{
+ app_assign_worker_ids();
+ app_init_mbuf_pools();
+ app_init_lpm_tables();
+ app_init_rings_rx();
+ app_init_rings_tx();
+ app_init_nics();
+
+ printf("Initialization completed.\n");
+}
diff --git a/examples/load_balancer/main.c b/examples/load_balancer/main.c
new file mode 100644
index 00000000..7ede3585
--- /dev/null
+++ b/examples/load_balancer/main.c
@@ -0,0 +1,109 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_lpm.h>
+
+#include "main.h"
+
+int
+main(int argc, char **argv)
+{
+ uint32_t lcore;
+ int ret;
+
+ /* Init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ return -1;
+ argc -= ret;
+ argv += ret;
+
+ /* Parse application arguments (after the EAL ones) */
+ ret = app_parse_args(argc, argv);
+ if (ret < 0) {
+ app_print_usage();
+ return -1;
+ }
+
+ /* Init */
+ app_init();
+ app_print_params();
+
+ /* Launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(app_lcore_main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ if (rte_eal_wait_lcore(lcore) < 0) {
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/load_balancer/main.h b/examples/load_balancer/main.h
new file mode 100644
index 00000000..d98468a7
--- /dev/null
+++ b/examples/load_balancer/main.h
@@ -0,0 +1,371 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+/* Logical cores */
+#ifndef APP_MAX_SOCKETS
+#define APP_MAX_SOCKETS 2
+#endif
+
+#ifndef APP_MAX_LCORES
+#define APP_MAX_LCORES RTE_MAX_LCORE
+#endif
+
+#ifndef APP_MAX_NIC_PORTS
+#define APP_MAX_NIC_PORTS RTE_MAX_ETHPORTS
+#endif
+
+#ifndef APP_MAX_RX_QUEUES_PER_NIC_PORT
+#define APP_MAX_RX_QUEUES_PER_NIC_PORT 128
+#endif
+
+#ifndef APP_MAX_TX_QUEUES_PER_NIC_PORT
+#define APP_MAX_TX_QUEUES_PER_NIC_PORT 128
+#endif
+
+#ifndef APP_MAX_IO_LCORES
+#define APP_MAX_IO_LCORES 16
+#endif
+#if (APP_MAX_IO_LCORES > APP_MAX_LCORES)
+#error "APP_MAX_IO_LCORES is too big"
+#endif
+
+#ifndef APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE
+#define APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE 16
+#endif
+
+#ifndef APP_MAX_NIC_TX_PORTS_PER_IO_LCORE
+#define APP_MAX_NIC_TX_PORTS_PER_IO_LCORE 16
+#endif
+#if (APP_MAX_NIC_TX_PORTS_PER_IO_LCORE > APP_MAX_NIC_PORTS)
+#error "APP_MAX_NIC_TX_PORTS_PER_IO_LCORE too big"
+#endif
+
+#ifndef APP_MAX_WORKER_LCORES
+#define APP_MAX_WORKER_LCORES 16
+#endif
+#if (APP_MAX_WORKER_LCORES > APP_MAX_LCORES)
+#error "APP_MAX_WORKER_LCORES is too big"
+#endif
+
+
+/* Mempools */
+#ifndef APP_DEFAULT_MBUF_DATA_SIZE
+#define APP_DEFAULT_MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
+#endif
+
+#ifndef APP_DEFAULT_MEMPOOL_BUFFERS
+#define APP_DEFAULT_MEMPOOL_BUFFERS 8192 * 4
+#endif
+
+#ifndef APP_DEFAULT_MEMPOOL_CACHE_SIZE
+#define APP_DEFAULT_MEMPOOL_CACHE_SIZE 256
+#endif
+
+/* LPM Tables */
+#ifndef APP_MAX_LPM_RULES
+#define APP_MAX_LPM_RULES 1024
+#endif
+
+/* NIC RX */
+#ifndef APP_DEFAULT_NIC_RX_RING_SIZE
+#define APP_DEFAULT_NIC_RX_RING_SIZE 1024
+#endif
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#ifndef APP_DEFAULT_NIC_RX_PTHRESH
+#define APP_DEFAULT_NIC_RX_PTHRESH 8
+#endif
+
+#ifndef APP_DEFAULT_NIC_RX_HTHRESH
+#define APP_DEFAULT_NIC_RX_HTHRESH 8
+#endif
+
+#ifndef APP_DEFAULT_NIC_RX_WTHRESH
+#define APP_DEFAULT_NIC_RX_WTHRESH 4
+#endif
+
+#ifndef APP_DEFAULT_NIC_RX_FREE_THRESH
+#define APP_DEFAULT_NIC_RX_FREE_THRESH 64
+#endif
+
+#ifndef APP_DEFAULT_NIC_RX_DROP_EN
+#define APP_DEFAULT_NIC_RX_DROP_EN 0
+#endif
+
+/* NIC TX */
+#ifndef APP_DEFAULT_NIC_TX_RING_SIZE
+#define APP_DEFAULT_NIC_TX_RING_SIZE 1024
+#endif
+
+/*
+ * These default values are optimized for use with the Intel(R) 82599 10 GbE
+ * Controller and the DPDK ixgbe PMD. Consider using other values for other
+ * network controllers and/or network drivers.
+ */
+#ifndef APP_DEFAULT_NIC_TX_PTHRESH
+#define APP_DEFAULT_NIC_TX_PTHRESH 36
+#endif
+
+#ifndef APP_DEFAULT_NIC_TX_HTHRESH
+#define APP_DEFAULT_NIC_TX_HTHRESH 0
+#endif
+
+#ifndef APP_DEFAULT_NIC_TX_WTHRESH
+#define APP_DEFAULT_NIC_TX_WTHRESH 0
+#endif
+
+#ifndef APP_DEFAULT_NIC_TX_FREE_THRESH
+#define APP_DEFAULT_NIC_TX_FREE_THRESH 0
+#endif
+
+#ifndef APP_DEFAULT_NIC_TX_RS_THRESH
+#define APP_DEFAULT_NIC_TX_RS_THRESH 0
+#endif
+
+/* Software Rings */
+#ifndef APP_DEFAULT_RING_RX_SIZE
+#define APP_DEFAULT_RING_RX_SIZE 1024
+#endif
+
+#ifndef APP_DEFAULT_RING_TX_SIZE
+#define APP_DEFAULT_RING_TX_SIZE 1024
+#endif
+
+/* Bursts */
+#ifndef APP_MBUF_ARRAY_SIZE
+#define APP_MBUF_ARRAY_SIZE 512
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_IO_RX_READ
+#define APP_DEFAULT_BURST_SIZE_IO_RX_READ 144
+#endif
+#if (APP_DEFAULT_BURST_SIZE_IO_RX_READ > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_IO_RX_READ is too big"
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_IO_RX_WRITE
+#define APP_DEFAULT_BURST_SIZE_IO_RX_WRITE 144
+#endif
+#if (APP_DEFAULT_BURST_SIZE_IO_RX_WRITE > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_IO_RX_WRITE is too big"
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_IO_TX_READ
+#define APP_DEFAULT_BURST_SIZE_IO_TX_READ 144
+#endif
+#if (APP_DEFAULT_BURST_SIZE_IO_TX_READ > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_IO_TX_READ is too big"
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_IO_TX_WRITE
+#define APP_DEFAULT_BURST_SIZE_IO_TX_WRITE 144
+#endif
+#if (APP_DEFAULT_BURST_SIZE_IO_TX_WRITE > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_IO_TX_WRITE is too big"
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_WORKER_READ
+#define APP_DEFAULT_BURST_SIZE_WORKER_READ 144
+#endif
+#if ((2 * APP_DEFAULT_BURST_SIZE_WORKER_READ) > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_WORKER_READ is too big"
+#endif
+
+#ifndef APP_DEFAULT_BURST_SIZE_WORKER_WRITE
+#define APP_DEFAULT_BURST_SIZE_WORKER_WRITE 144
+#endif
+#if (APP_DEFAULT_BURST_SIZE_WORKER_WRITE > APP_MBUF_ARRAY_SIZE)
+#error "APP_DEFAULT_BURST_SIZE_WORKER_WRITE is too big"
+#endif
+
+/* Load balancing logic */
+#ifndef APP_DEFAULT_IO_RX_LB_POS
+#define APP_DEFAULT_IO_RX_LB_POS 29
+#endif
+#if (APP_DEFAULT_IO_RX_LB_POS >= 64)
+#error "APP_DEFAULT_IO_RX_LB_POS is too big"
+#endif
+
+struct app_mbuf_array {
+ struct rte_mbuf *array[APP_MBUF_ARRAY_SIZE];
+ uint32_t n_mbufs;
+};
+
+enum app_lcore_type {
+ e_APP_LCORE_DISABLED = 0,
+ e_APP_LCORE_IO,
+ e_APP_LCORE_WORKER
+};
+
+struct app_lcore_params_io {
+ /* I/O RX */
+ struct {
+ /* NIC */
+ struct {
+ uint8_t port;
+ uint8_t queue;
+ } nic_queues[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
+ uint32_t n_nic_queues;
+
+ /* Rings */
+ struct rte_ring *rings[APP_MAX_WORKER_LCORES];
+ uint32_t n_rings;
+
+ /* Internal buffers */
+ struct app_mbuf_array mbuf_in;
+ struct app_mbuf_array mbuf_out[APP_MAX_WORKER_LCORES];
+ uint8_t mbuf_out_flush[APP_MAX_WORKER_LCORES];
+
+ /* Stats */
+ uint32_t nic_queues_count[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
+ uint32_t nic_queues_iters[APP_MAX_NIC_RX_QUEUES_PER_IO_LCORE];
+ uint32_t rings_count[APP_MAX_WORKER_LCORES];
+ uint32_t rings_iters[APP_MAX_WORKER_LCORES];
+ } rx;
+
+ /* I/O TX */
+ struct {
+ /* Rings */
+ struct rte_ring *rings[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
+
+ /* NIC */
+ uint8_t nic_ports[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
+ uint32_t n_nic_ports;
+
+ /* Internal buffers */
+ struct app_mbuf_array mbuf_out[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
+ uint8_t mbuf_out_flush[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
+
+ /* Stats */
+ uint32_t rings_count[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
+ uint32_t rings_iters[APP_MAX_NIC_PORTS][APP_MAX_WORKER_LCORES];
+ uint32_t nic_ports_count[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
+ uint32_t nic_ports_iters[APP_MAX_NIC_TX_PORTS_PER_IO_LCORE];
+ } tx;
+};
+
+struct app_lcore_params_worker {
+ /* Rings */
+ struct rte_ring *rings_in[APP_MAX_IO_LCORES];
+ uint32_t n_rings_in;
+ struct rte_ring *rings_out[APP_MAX_NIC_PORTS];
+
+ /* LPM table */
+ struct rte_lpm *lpm_table;
+ uint32_t worker_id;
+
+ /* Internal buffers */
+ struct app_mbuf_array mbuf_in;
+ struct app_mbuf_array mbuf_out[APP_MAX_NIC_PORTS];
+ uint8_t mbuf_out_flush[APP_MAX_NIC_PORTS];
+
+ /* Stats */
+ uint32_t rings_in_count[APP_MAX_IO_LCORES];
+ uint32_t rings_in_iters[APP_MAX_IO_LCORES];
+ uint32_t rings_out_count[APP_MAX_NIC_PORTS];
+ uint32_t rings_out_iters[APP_MAX_NIC_PORTS];
+};
+
+struct app_lcore_params {
+ union {
+ struct app_lcore_params_io io;
+ struct app_lcore_params_worker worker;
+ };
+ enum app_lcore_type type;
+ struct rte_mempool *pool;
+} __rte_cache_aligned;
+
+struct app_lpm_rule {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+struct app_params {
+ /* lcore */
+ struct app_lcore_params lcore_params[APP_MAX_LCORES];
+
+ /* NIC */
+ uint8_t nic_rx_queue_mask[APP_MAX_NIC_PORTS][APP_MAX_RX_QUEUES_PER_NIC_PORT];
+ uint8_t nic_tx_port_mask[APP_MAX_NIC_PORTS];
+
+ /* mbuf pools */
+ struct rte_mempool *pools[APP_MAX_SOCKETS];
+
+ /* LPM tables */
+ struct rte_lpm *lpm_tables[APP_MAX_SOCKETS];
+ struct app_lpm_rule lpm_rules[APP_MAX_LPM_RULES];
+ uint32_t n_lpm_rules;
+
+ /* rings */
+ uint32_t nic_rx_ring_size;
+ uint32_t nic_tx_ring_size;
+ uint32_t ring_rx_size;
+ uint32_t ring_tx_size;
+
+ /* burst size */
+ uint32_t burst_size_io_rx_read;
+ uint32_t burst_size_io_rx_write;
+ uint32_t burst_size_io_tx_read;
+ uint32_t burst_size_io_tx_write;
+ uint32_t burst_size_worker_read;
+ uint32_t burst_size_worker_write;
+
+ /* load balancing */
+ uint8_t pos_lb;
+} __rte_cache_aligned;
+
+extern struct app_params app;
+
+int app_parse_args(int argc, char **argv);
+void app_print_usage(void);
+void app_init(void);
+int app_lcore_main_loop(void *arg);
+
+int app_get_nic_rx_queues_per_port(uint8_t port);
+int app_get_lcore_for_nic_rx(uint8_t port, uint8_t queue, uint32_t *lcore_out);
+int app_get_lcore_for_nic_tx(uint8_t port, uint32_t *lcore_out);
+int app_is_socket_used(uint32_t socket);
+uint32_t app_get_lcores_io_rx(void);
+uint32_t app_get_lcores_worker(void);
+void app_print_params(void);
+
+#endif /* _MAIN_H_ */
diff --git a/examples/load_balancer/runtime.c b/examples/load_balancer/runtime.c
new file mode 100644
index 00000000..6944325d
--- /dev/null
+++ b/examples/load_balancer/runtime.c
@@ -0,0 +1,668 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_lpm.h>
+
+#include "main.h"
+
+#ifndef APP_LCORE_IO_FLUSH
+#define APP_LCORE_IO_FLUSH 1000000
+#endif
+
+#ifndef APP_LCORE_WORKER_FLUSH
+#define APP_LCORE_WORKER_FLUSH 1000000
+#endif
+
+#ifndef APP_STATS
+#define APP_STATS 1000000
+#endif
+
+#define APP_IO_RX_DROP_ALL_PACKETS 0
+#define APP_WORKER_DROP_ALL_PACKETS 0
+#define APP_IO_TX_DROP_ALL_PACKETS 0
+
+#ifndef APP_IO_RX_PREFETCH_ENABLE
+#define APP_IO_RX_PREFETCH_ENABLE 1
+#endif
+
+#ifndef APP_WORKER_PREFETCH_ENABLE
+#define APP_WORKER_PREFETCH_ENABLE 1
+#endif
+
+#ifndef APP_IO_TX_PREFETCH_ENABLE
+#define APP_IO_TX_PREFETCH_ENABLE 1
+#endif
+
+#if APP_IO_RX_PREFETCH_ENABLE
+#define APP_IO_RX_PREFETCH0(p) rte_prefetch0(p)
+#define APP_IO_RX_PREFETCH1(p) rte_prefetch1(p)
+#else
+#define APP_IO_RX_PREFETCH0(p)
+#define APP_IO_RX_PREFETCH1(p)
+#endif
+
+#if APP_WORKER_PREFETCH_ENABLE
+#define APP_WORKER_PREFETCH0(p) rte_prefetch0(p)
+#define APP_WORKER_PREFETCH1(p) rte_prefetch1(p)
+#else
+#define APP_WORKER_PREFETCH0(p)
+#define APP_WORKER_PREFETCH1(p)
+#endif
+
+#if APP_IO_TX_PREFETCH_ENABLE
+#define APP_IO_TX_PREFETCH0(p) rte_prefetch0(p)
+#define APP_IO_TX_PREFETCH1(p) rte_prefetch1(p)
+#else
+#define APP_IO_TX_PREFETCH0(p)
+#define APP_IO_TX_PREFETCH1(p)
+#endif
+
+static inline void
+app_lcore_io_rx_buffer_to_send (
+ struct app_lcore_params_io *lp,
+ uint32_t worker,
+ struct rte_mbuf *mbuf,
+ uint32_t bsz)
+{
+ uint32_t pos;
+ int ret;
+
+ pos = lp->rx.mbuf_out[worker].n_mbufs;
+ lp->rx.mbuf_out[worker].array[pos ++] = mbuf;
+ if (likely(pos < bsz)) {
+ lp->rx.mbuf_out[worker].n_mbufs = pos;
+ return;
+ }
+
+ ret = rte_ring_sp_enqueue_bulk(
+ lp->rx.rings[worker],
+ (void **) lp->rx.mbuf_out[worker].array,
+ bsz);
+
+ if (unlikely(ret == -ENOBUFS)) {
+ uint32_t k;
+ for (k = 0; k < bsz; k ++) {
+ struct rte_mbuf *m = lp->rx.mbuf_out[worker].array[k];
+ rte_pktmbuf_free(m);
+ }
+ }
+
+ lp->rx.mbuf_out[worker].n_mbufs = 0;
+ lp->rx.mbuf_out_flush[worker] = 0;
+
+#if APP_STATS
+ lp->rx.rings_iters[worker] ++;
+ if (likely(ret == 0)) {
+ lp->rx.rings_count[worker] ++;
+ }
+ if (unlikely(lp->rx.rings_iters[worker] == APP_STATS)) {
+ unsigned lcore = rte_lcore_id();
+
+ printf("\tI/O RX %u out (worker %u): enq success rate = %.2f\n",
+ lcore,
+ (unsigned)worker,
+ ((double) lp->rx.rings_count[worker]) / ((double) lp->rx.rings_iters[worker]));
+ lp->rx.rings_iters[worker] = 0;
+ lp->rx.rings_count[worker] = 0;
+ }
+#endif
+}
+
+static inline void
+app_lcore_io_rx(
+ struct app_lcore_params_io *lp,
+ uint32_t n_workers,
+ uint32_t bsz_rd,
+ uint32_t bsz_wr,
+ uint8_t pos_lb)
+{
+ struct rte_mbuf *mbuf_1_0, *mbuf_1_1, *mbuf_2_0, *mbuf_2_1;
+ uint8_t *data_1_0, *data_1_1 = NULL;
+ uint32_t i;
+
+ for (i = 0; i < lp->rx.n_nic_queues; i ++) {
+ uint8_t port = lp->rx.nic_queues[i].port;
+ uint8_t queue = lp->rx.nic_queues[i].queue;
+ uint32_t n_mbufs, j;
+
+ n_mbufs = rte_eth_rx_burst(
+ port,
+ queue,
+ lp->rx.mbuf_in.array,
+ (uint16_t) bsz_rd);
+
+ if (unlikely(n_mbufs == 0)) {
+ continue;
+ }
+
+#if APP_STATS
+ lp->rx.nic_queues_iters[i] ++;
+ lp->rx.nic_queues_count[i] += n_mbufs;
+ if (unlikely(lp->rx.nic_queues_iters[i] == APP_STATS)) {
+ struct rte_eth_stats stats;
+ unsigned lcore = rte_lcore_id();
+
+ rte_eth_stats_get(port, &stats);
+
+ printf("I/O RX %u in (NIC port %u): NIC drop ratio = %.2f avg burst size = %.2f\n",
+ lcore,
+ (unsigned) port,
+ (double) stats.imissed / (double) (stats.imissed + stats.ipackets),
+ ((double) lp->rx.nic_queues_count[i]) / ((double) lp->rx.nic_queues_iters[i]));
+ lp->rx.nic_queues_iters[i] = 0;
+ lp->rx.nic_queues_count[i] = 0;
+ }
+#endif
+
+#if APP_IO_RX_DROP_ALL_PACKETS
+ for (j = 0; j < n_mbufs; j ++) {
+ struct rte_mbuf *pkt = lp->rx.mbuf_in.array[j];
+ rte_pktmbuf_free(pkt);
+ }
+
+ continue;
+#endif
+
+ mbuf_1_0 = lp->rx.mbuf_in.array[0];
+ mbuf_1_1 = lp->rx.mbuf_in.array[1];
+ data_1_0 = rte_pktmbuf_mtod(mbuf_1_0, uint8_t *);
+ if (likely(n_mbufs > 1)) {
+ data_1_1 = rte_pktmbuf_mtod(mbuf_1_1, uint8_t *);
+ }
+
+ mbuf_2_0 = lp->rx.mbuf_in.array[2];
+ mbuf_2_1 = lp->rx.mbuf_in.array[3];
+ APP_IO_RX_PREFETCH0(mbuf_2_0);
+ APP_IO_RX_PREFETCH0(mbuf_2_1);
+
+ for (j = 0; j + 3 < n_mbufs; j += 2) {
+ struct rte_mbuf *mbuf_0_0, *mbuf_0_1;
+ uint8_t *data_0_0, *data_0_1;
+ uint32_t worker_0, worker_1;
+
+ mbuf_0_0 = mbuf_1_0;
+ mbuf_0_1 = mbuf_1_1;
+ data_0_0 = data_1_0;
+ data_0_1 = data_1_1;
+
+ mbuf_1_0 = mbuf_2_0;
+ mbuf_1_1 = mbuf_2_1;
+ data_1_0 = rte_pktmbuf_mtod(mbuf_2_0, uint8_t *);
+ data_1_1 = rte_pktmbuf_mtod(mbuf_2_1, uint8_t *);
+ APP_IO_RX_PREFETCH0(data_1_0);
+ APP_IO_RX_PREFETCH0(data_1_1);
+
+ mbuf_2_0 = lp->rx.mbuf_in.array[j+4];
+ mbuf_2_1 = lp->rx.mbuf_in.array[j+5];
+ APP_IO_RX_PREFETCH0(mbuf_2_0);
+ APP_IO_RX_PREFETCH0(mbuf_2_1);
+
+ worker_0 = data_0_0[pos_lb] & (n_workers - 1);
+ worker_1 = data_0_1[pos_lb] & (n_workers - 1);
+
+ app_lcore_io_rx_buffer_to_send(lp, worker_0, mbuf_0_0, bsz_wr);
+ app_lcore_io_rx_buffer_to_send(lp, worker_1, mbuf_0_1, bsz_wr);
+ }
+
+ /* Handle the last 1, 2 (when n_mbufs is even) or 3 (when n_mbufs is odd) packets */
+ for ( ; j < n_mbufs; j += 1) {
+ struct rte_mbuf *mbuf;
+ uint8_t *data;
+ uint32_t worker;
+
+ mbuf = mbuf_1_0;
+ mbuf_1_0 = mbuf_1_1;
+ mbuf_1_1 = mbuf_2_0;
+ mbuf_2_0 = mbuf_2_1;
+
+ data = rte_pktmbuf_mtod(mbuf, uint8_t *);
+
+ APP_IO_RX_PREFETCH0(mbuf_1_0);
+
+ worker = data[pos_lb] & (n_workers - 1);
+
+ app_lcore_io_rx_buffer_to_send(lp, worker, mbuf, bsz_wr);
+ }
+ }
+}
+
+static inline void
+app_lcore_io_rx_flush(struct app_lcore_params_io *lp, uint32_t n_workers)
+{
+ uint32_t worker;
+
+ for (worker = 0; worker < n_workers; worker ++) {
+ int ret;
+
+ if (likely((lp->rx.mbuf_out_flush[worker] == 0) ||
+ (lp->rx.mbuf_out[worker].n_mbufs == 0))) {
+ lp->rx.mbuf_out_flush[worker] = 1;
+ continue;
+ }
+
+ ret = rte_ring_sp_enqueue_bulk(
+ lp->rx.rings[worker],
+ (void **) lp->rx.mbuf_out[worker].array,
+ lp->rx.mbuf_out[worker].n_mbufs);
+
+ if (unlikely(ret < 0)) {
+ uint32_t k;
+ for (k = 0; k < lp->rx.mbuf_out[worker].n_mbufs; k ++) {
+ struct rte_mbuf *pkt_to_free = lp->rx.mbuf_out[worker].array[k];
+ rte_pktmbuf_free(pkt_to_free);
+ }
+ }
+
+ lp->rx.mbuf_out[worker].n_mbufs = 0;
+ lp->rx.mbuf_out_flush[worker] = 1;
+ }
+}
+
+static inline void
+app_lcore_io_tx(
+ struct app_lcore_params_io *lp,
+ uint32_t n_workers,
+ uint32_t bsz_rd,
+ uint32_t bsz_wr)
+{
+ uint32_t worker;
+
+ for (worker = 0; worker < n_workers; worker ++) {
+ uint32_t i;
+
+ for (i = 0; i < lp->tx.n_nic_ports; i ++) {
+ uint8_t port = lp->tx.nic_ports[i];
+ struct rte_ring *ring = lp->tx.rings[port][worker];
+ uint32_t n_mbufs, n_pkts;
+ int ret;
+
+ n_mbufs = lp->tx.mbuf_out[port].n_mbufs;
+ ret = rte_ring_sc_dequeue_bulk(
+ ring,
+ (void **) &lp->tx.mbuf_out[port].array[n_mbufs],
+ bsz_rd);
+
+ if (unlikely(ret == -ENOENT)) {
+ continue;
+ }
+
+ n_mbufs += bsz_rd;
+
+#if APP_IO_TX_DROP_ALL_PACKETS
+ {
+ uint32_t j;
+ APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[0]);
+ APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[1]);
+
+ for (j = 0; j < n_mbufs; j ++) {
+ if (likely(j < n_mbufs - 2)) {
+ APP_IO_TX_PREFETCH0(lp->tx.mbuf_out[port].array[j + 2]);
+ }
+
+ rte_pktmbuf_free(lp->tx.mbuf_out[port].array[j]);
+ }
+
+ lp->tx.mbuf_out[port].n_mbufs = 0;
+
+ continue;
+ }
+#endif
+
+ if (unlikely(n_mbufs < bsz_wr)) {
+ lp->tx.mbuf_out[port].n_mbufs = n_mbufs;
+ continue;
+ }
+
+ n_pkts = rte_eth_tx_burst(
+ port,
+ 0,
+ lp->tx.mbuf_out[port].array,
+ (uint16_t) n_mbufs);
+
+#if APP_STATS
+ lp->tx.nic_ports_iters[port] ++;
+ lp->tx.nic_ports_count[port] += n_pkts;
+ if (unlikely(lp->tx.nic_ports_iters[port] == APP_STATS)) {
+ unsigned lcore = rte_lcore_id();
+
+ printf("\t\t\tI/O TX %u out (port %u): avg burst size = %.2f\n",
+ lcore,
+ (unsigned) port,
+ ((double) lp->tx.nic_ports_count[port]) / ((double) lp->tx.nic_ports_iters[port]));
+ lp->tx.nic_ports_iters[port] = 0;
+ lp->tx.nic_ports_count[port] = 0;
+ }
+#endif
+
+ if (unlikely(n_pkts < n_mbufs)) {
+ uint32_t k;
+ for (k = n_pkts; k < n_mbufs; k ++) {
+ struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
+ rte_pktmbuf_free(pkt_to_free);
+ }
+ }
+ lp->tx.mbuf_out[port].n_mbufs = 0;
+ lp->tx.mbuf_out_flush[port] = 0;
+ }
+ }
+}
+
+static inline void
+app_lcore_io_tx_flush(struct app_lcore_params_io *lp)
+{
+ uint8_t port;
+
+ for (port = 0; port < lp->tx.n_nic_ports; port ++) {
+ uint32_t n_pkts;
+
+ if (likely((lp->tx.mbuf_out_flush[port] == 0) ||
+ (lp->tx.mbuf_out[port].n_mbufs == 0))) {
+ lp->tx.mbuf_out_flush[port] = 1;
+ continue;
+ }
+
+ n_pkts = rte_eth_tx_burst(
+ port,
+ 0,
+ lp->tx.mbuf_out[port].array,
+ (uint16_t) lp->tx.mbuf_out[port].n_mbufs);
+
+ if (unlikely(n_pkts < lp->tx.mbuf_out[port].n_mbufs)) {
+ uint32_t k;
+ for (k = n_pkts; k < lp->tx.mbuf_out[port].n_mbufs; k ++) {
+ struct rte_mbuf *pkt_to_free = lp->tx.mbuf_out[port].array[k];
+ rte_pktmbuf_free(pkt_to_free);
+ }
+ }
+
+ lp->tx.mbuf_out[port].n_mbufs = 0;
+ lp->tx.mbuf_out_flush[port] = 1;
+ }
+}
+
+static void
+app_lcore_main_loop_io(void)
+{
+ uint32_t lcore = rte_lcore_id();
+ struct app_lcore_params_io *lp = &app.lcore_params[lcore].io;
+ uint32_t n_workers = app_get_lcores_worker();
+ uint64_t i = 0;
+
+ uint32_t bsz_rx_rd = app.burst_size_io_rx_read;
+ uint32_t bsz_rx_wr = app.burst_size_io_rx_write;
+ uint32_t bsz_tx_rd = app.burst_size_io_tx_read;
+ uint32_t bsz_tx_wr = app.burst_size_io_tx_write;
+
+ uint8_t pos_lb = app.pos_lb;
+
+ for ( ; ; ) {
+ if (APP_LCORE_IO_FLUSH && (unlikely(i == APP_LCORE_IO_FLUSH))) {
+ if (likely(lp->rx.n_nic_queues > 0)) {
+ app_lcore_io_rx_flush(lp, n_workers);
+ }
+
+ if (likely(lp->tx.n_nic_ports > 0)) {
+ app_lcore_io_tx_flush(lp);
+ }
+
+ i = 0;
+ }
+
+ if (likely(lp->rx.n_nic_queues > 0)) {
+ app_lcore_io_rx(lp, n_workers, bsz_rx_rd, bsz_rx_wr, pos_lb);
+ }
+
+ if (likely(lp->tx.n_nic_ports > 0)) {
+ app_lcore_io_tx(lp, n_workers, bsz_tx_rd, bsz_tx_wr);
+ }
+
+ i ++;
+ }
+}
+
+static inline void
+app_lcore_worker(
+ struct app_lcore_params_worker *lp,
+ uint32_t bsz_rd,
+ uint32_t bsz_wr)
+{
+ uint32_t i;
+
+ for (i = 0; i < lp->n_rings_in; i ++) {
+ struct rte_ring *ring_in = lp->rings_in[i];
+ uint32_t j;
+ int ret;
+
+ ret = rte_ring_sc_dequeue_bulk(
+ ring_in,
+ (void **) lp->mbuf_in.array,
+ bsz_rd);
+
+ if (unlikely(ret == -ENOENT)) {
+ continue;
+ }
+
+#if APP_WORKER_DROP_ALL_PACKETS
+ for (j = 0; j < bsz_rd; j ++) {
+ struct rte_mbuf *pkt = lp->mbuf_in.array[j];
+ rte_pktmbuf_free(pkt);
+ }
+
+ continue;
+#endif
+
+ APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[0], unsigned char *));
+ APP_WORKER_PREFETCH0(lp->mbuf_in.array[1]);
+
+ for (j = 0; j < bsz_rd; j ++) {
+ struct rte_mbuf *pkt;
+ struct ipv4_hdr *ipv4_hdr;
+ uint32_t ipv4_dst, pos;
+ uint32_t port;
+
+ if (likely(j < bsz_rd - 1)) {
+ APP_WORKER_PREFETCH1(rte_pktmbuf_mtod(lp->mbuf_in.array[j+1], unsigned char *));
+ }
+ if (likely(j < bsz_rd - 2)) {
+ APP_WORKER_PREFETCH0(lp->mbuf_in.array[j+2]);
+ }
+
+ pkt = lp->mbuf_in.array[j];
+ ipv4_hdr = rte_pktmbuf_mtod_offset(pkt,
+ struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_dst = rte_be_to_cpu_32(ipv4_hdr->dst_addr);
+
+ if (unlikely(rte_lpm_lookup(lp->lpm_table, ipv4_dst, &port) != 0)) {
+ port = pkt->port;
+ }
+
+ pos = lp->mbuf_out[port].n_mbufs;
+
+ lp->mbuf_out[port].array[pos ++] = pkt;
+ if (likely(pos < bsz_wr)) {
+ lp->mbuf_out[port].n_mbufs = pos;
+ continue;
+ }
+
+ ret = rte_ring_sp_enqueue_bulk(
+ lp->rings_out[port],
+ (void **) lp->mbuf_out[port].array,
+ bsz_wr);
+
+#if APP_STATS
+ lp->rings_out_iters[port] ++;
+ if (ret == 0) {
+ lp->rings_out_count[port] += 1;
+ }
+ if (lp->rings_out_iters[port] == APP_STATS){
+ printf("\t\tWorker %u out (NIC port %u): enq success rate = %.2f\n",
+ (unsigned) lp->worker_id,
+ (unsigned) port,
+ ((double) lp->rings_out_count[port]) / ((double) lp->rings_out_iters[port]));
+ lp->rings_out_iters[port] = 0;
+ lp->rings_out_count[port] = 0;
+ }
+#endif
+
+ if (unlikely(ret == -ENOBUFS)) {
+ uint32_t k;
+ for (k = 0; k < bsz_wr; k ++) {
+ struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
+ rte_pktmbuf_free(pkt_to_free);
+ }
+ }
+
+ lp->mbuf_out[port].n_mbufs = 0;
+ lp->mbuf_out_flush[port] = 0;
+ }
+ }
+}
+
+static inline void
+app_lcore_worker_flush(struct app_lcore_params_worker *lp)
+{
+ uint32_t port;
+
+ for (port = 0; port < APP_MAX_NIC_PORTS; port ++) {
+ int ret;
+
+ if (unlikely(lp->rings_out[port] == NULL)) {
+ continue;
+ }
+
+ if (likely((lp->mbuf_out_flush[port] == 0) ||
+ (lp->mbuf_out[port].n_mbufs == 0))) {
+ lp->mbuf_out_flush[port] = 1;
+ continue;
+ }
+
+ ret = rte_ring_sp_enqueue_bulk(
+ lp->rings_out[port],
+ (void **) lp->mbuf_out[port].array,
+ lp->mbuf_out[port].n_mbufs);
+
+ if (unlikely(ret < 0)) {
+ uint32_t k;
+ for (k = 0; k < lp->mbuf_out[port].n_mbufs; k ++) {
+ struct rte_mbuf *pkt_to_free = lp->mbuf_out[port].array[k];
+ rte_pktmbuf_free(pkt_to_free);
+ }
+ }
+
+ lp->mbuf_out[port].n_mbufs = 0;
+ lp->mbuf_out_flush[port] = 1;
+ }
+}
+
+static void
+app_lcore_main_loop_worker(void) {
+ uint32_t lcore = rte_lcore_id();
+ struct app_lcore_params_worker *lp = &app.lcore_params[lcore].worker;
+ uint64_t i = 0;
+
+ uint32_t bsz_rd = app.burst_size_worker_read;
+ uint32_t bsz_wr = app.burst_size_worker_write;
+
+ for ( ; ; ) {
+ if (APP_LCORE_WORKER_FLUSH && (unlikely(i == APP_LCORE_WORKER_FLUSH))) {
+ app_lcore_worker_flush(lp);
+ i = 0;
+ }
+
+ app_lcore_worker(lp, bsz_rd, bsz_wr);
+
+ i ++;
+ }
+}
+
+int
+app_lcore_main_loop(__attribute__((unused)) void *arg)
+{
+ struct app_lcore_params *lp;
+ unsigned lcore;
+
+ lcore = rte_lcore_id();
+ lp = &app.lcore_params[lcore];
+
+ if (lp->type == e_APP_LCORE_IO) {
+ printf("Logical core %u (I/O) main loop.\n", lcore);
+ app_lcore_main_loop_io();
+ }
+
+ if (lp->type == e_APP_LCORE_WORKER) {
+ printf("Logical core %u (worker %u) main loop.\n",
+ lcore,
+ (unsigned) lp->worker.worker_id);
+ app_lcore_main_loop_worker();
+ }
+
+ return 0;
+}
diff --git a/examples/multi_process/Makefile b/examples/multi_process/Makefile
new file mode 100644
index 00000000..6b315cc0
--- /dev/null
+++ b/examples/multi_process/Makefile
@@ -0,0 +1,45 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += client_server_mp
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += simple_mp
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += symmetric_mp
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/multi_process/client_server_mp/Makefile b/examples/multi_process/client_server_mp/Makefile
new file mode 100644
index 00000000..89cc6bf8
--- /dev/null
+++ b/examples/multi_process/client_server_mp/Makefile
@@ -0,0 +1,44 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += mp_client
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += mp_server
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/multi_process/client_server_mp/mp_client/Makefile b/examples/multi_process/client_server_mp/mp_client/Makefile
new file mode 100644
index 00000000..2688fed0
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_client/Makefile
@@ -0,0 +1,48 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = mp_client
+
+# all source are stored in SRCS-y
+SRCS-y := client.c
+
+CFLAGS += $(WERROR_FLAGS) -O3
+CFLAGS += -I$(SRCDIR)/../shared
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/client_server_mp/mp_client/client.c b/examples/multi_process/client_server_mp/mp_client/client.c
new file mode 100644
index 00000000..d4f9ca37
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_client/client.c
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <getopt.h>
+#include <string.h>
+
+#include <rte_common.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_ring.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_string_fns.h>
+
+#include "common.h"
+
+/* Number of packets to attempt to read from queue */
+#define PKT_READ_SIZE ((uint16_t)32)
+
+/* our client id number - tells us which rx queue to read, and NIC TX
+ * queue to write to. */
+static uint8_t client_id = 0;
+
+#define MBQ_CAPACITY 32
+
+/* maps input ports to output ports for packets */
+static uint8_t output_ports[RTE_MAX_ETHPORTS];
+
+/* buffers up a set of packet that are ready to send */
+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+/* shared data from server. We update statistics here */
+static volatile struct tx_stats *tx_stats;
+
+
+/*
+ * print a usage message
+ */
+static void
+usage(const char *progname)
+{
+ printf("Usage: %s [EAL args] -- -n <client_id>\n\n", progname);
+}
+
+/*
+ * Convert the client id number from a string to an int.
+ */
+static int
+parse_client_num(const char *client)
+{
+ char *end = NULL;
+ unsigned long temp;
+
+ if (client == NULL || *client == '\0')
+ return -1;
+
+ temp = strtoul(client, &end, 10);
+ if (end == NULL || *end != '\0')
+ return -1;
+
+ client_id = (uint8_t)temp;
+ return 0;
+}
+
+/*
+ * Parse the application arguments to the client app.
+ */
+static int
+parse_app_args(int argc, char *argv[])
+{
+ int option_index, opt;
+ char **argvopt = argv;
+ const char *progname = NULL;
+ static struct option lgopts[] = { /* no long options */
+ {NULL, 0, 0, 0 }
+ };
+ progname = argv[0];
+
+ while ((opt = getopt_long(argc, argvopt, "n:", lgopts,
+ &option_index)) != EOF){
+ switch (opt){
+ case 'n':
+ if (parse_client_num(optarg) != 0){
+ usage(progname);
+ return -1;
+ }
+ break;
+ default:
+ usage(progname);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+/*
+ * Tx buffer error callback
+ */
+static void
+flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count,
+ void *userdata) {
+ int i;
+ uint8_t port_id = (uintptr_t)userdata;
+
+ tx_stats->tx_drop[port_id] += count;
+
+ /* free the mbufs which failed from transmit */
+ for (i = 0; i < count; i++)
+ rte_pktmbuf_free(unsent[i]);
+
+}
+
+static void
+configure_tx_buffer(uint8_t port_id, uint16_t size)
+{
+ int ret;
+
+ /* Initialize TX buffers */
+ tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(size), 0,
+ rte_eth_dev_socket_id(port_id));
+ if (tx_buffer[port_id] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) port_id);
+
+ rte_eth_tx_buffer_init(tx_buffer[port_id], size);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id],
+ flush_tx_error_callback, (void *)(intptr_t)port_id);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) port_id);
+}
+
+/*
+ * set up output ports so that all traffic on port gets sent out
+ * its paired port. Index using actual port numbers since that is
+ * what comes in the mbuf structure.
+ */
+static void
+configure_output_ports(const struct port_info *ports)
+{
+ int i;
+ if (ports->num_ports > RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE, "Too many ethernet ports. RTE_MAX_ETHPORTS = %u\n",
+ (unsigned)RTE_MAX_ETHPORTS);
+ for (i = 0; i < ports->num_ports - 1; i+=2){
+ uint8_t p1 = ports->id[i];
+ uint8_t p2 = ports->id[i+1];
+ output_ports[p1] = p2;
+ output_ports[p2] = p1;
+
+ configure_tx_buffer(p1, MBQ_CAPACITY);
+ configure_tx_buffer(p2, MBQ_CAPACITY);
+
+ }
+}
+
+/*
+ * This function performs routing of packets
+ * Just sends each input packet out an output port based solely on the input
+ * port it arrived on.
+ */
+static void
+handle_packet(struct rte_mbuf *buf)
+{
+ int sent;
+ const uint8_t in_port = buf->port;
+ const uint8_t out_port = output_ports[in_port];
+ struct rte_eth_dev_tx_buffer *buffer = tx_buffer[out_port];
+
+ sent = rte_eth_tx_buffer(out_port, client_id, buffer, buf);
+ if (sent)
+ tx_stats->tx[out_port] += sent;
+
+}
+
+/*
+ * Application main function - loops through
+ * receiving and processing packets. Never returns
+ */
+int
+main(int argc, char *argv[])
+{
+ const struct rte_memzone *mz;
+ struct rte_ring *rx_ring;
+ struct rte_mempool *mp;
+ struct port_info *ports;
+ int need_flush = 0; /* indicates whether we have unsent packets */
+ int retval;
+ void *pkts[PKT_READ_SIZE];
+ uint16_t sent;
+
+ if ((retval = rte_eal_init(argc, argv)) < 0)
+ return -1;
+ argc -= retval;
+ argv += retval;
+
+ if (parse_app_args(argc, argv) < 0)
+ rte_exit(EXIT_FAILURE, "Invalid command-line arguments\n");
+
+ if (rte_eth_dev_count() == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ rx_ring = rte_ring_lookup(get_rx_queue_name(client_id));
+ if (rx_ring == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot get RX ring - is server process running?\n");
+
+ mp = rte_mempool_lookup(PKTMBUF_POOL_NAME);
+ if (mp == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot get mempool for mbufs\n");
+
+ mz = rte_memzone_lookup(MZ_PORT_INFO);
+ if (mz == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot get port info structure\n");
+ ports = mz->addr;
+ tx_stats = &(ports->tx_stats[client_id]);
+
+ configure_output_ports(ports);
+
+ RTE_LOG(INFO, APP, "Finished Process Init.\n");
+
+ printf("\nClient process %d handling packets\n", client_id);
+ printf("[Press Ctrl-C to quit ...]\n");
+
+ for (;;) {
+ uint16_t i, rx_pkts = PKT_READ_SIZE;
+ uint8_t port;
+
+ /* try dequeuing max possible packets first, if that fails, get the
+ * most we can. Loop body should only execute once, maximum */
+ while (rx_pkts > 0 &&
+ unlikely(rte_ring_dequeue_bulk(rx_ring, pkts, rx_pkts) != 0))
+ rx_pkts = (uint16_t)RTE_MIN(rte_ring_count(rx_ring), PKT_READ_SIZE);
+
+ if (unlikely(rx_pkts == 0)){
+ if (need_flush)
+ for (port = 0; port < ports->num_ports; port++) {
+ sent = rte_eth_tx_buffer_flush(ports->id[port], client_id,
+ tx_buffer[port]);
+ if (unlikely(sent))
+ tx_stats->tx[port] += sent;
+ }
+ need_flush = 0;
+ continue;
+ }
+
+ for (i = 0; i < rx_pkts; i++)
+ handle_packet(pkts[i]);
+
+ need_flush = 1;
+ }
+}
diff --git a/examples/multi_process/client_server_mp/mp_server/Makefile b/examples/multi_process/client_server_mp/mp_server/Makefile
new file mode 100644
index 00000000..c29e4783
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/Makefile
@@ -0,0 +1,61 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = mp_server
+
+# all source are stored in SRCS-y
+SRCS-y := main.c init.c args.c
+
+INC := $(wildcard *.h)
+
+CFLAGS += $(WERROR_FLAGS) -O3
+CFLAGS += -I$(SRCDIR)/../shared
+
+# for newer gcc, e.g. 4.4, no-strict-aliasing may not be necessary
+# and so the next line can be removed in those cases.
+EXTRA_CFLAGS += -fno-strict-aliasing
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/client_server_mp/mp_server/args.c b/examples/multi_process/client_server_mp/mp_server/args.c
new file mode 100644
index 00000000..bf8c666c
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/args.c
@@ -0,0 +1,172 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <errno.h>
+
+#include <rte_memory.h>
+#include <rte_string_fns.h>
+
+#include "common.h"
+#include "args.h"
+#include "init.h"
+
+/* global var for number of clients - extern in header */
+uint8_t num_clients;
+
+static const char *progname;
+
+/**
+ * Prints out usage information to stdout
+ */
+static void
+usage(void)
+{
+ printf(
+ "%s [EAL options] -- -p PORTMASK -n NUM_CLIENTS [-s NUM_SOCKETS]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to use\n"
+ " -n NUM_CLIENTS: number of client processes to use\n"
+ , progname);
+}
+
+/**
+ * The ports to be used by the application are passed in
+ * the form of a bitmask. This function parses the bitmask
+ * and places the port numbers to be used into the port[]
+ * array variable
+ */
+static int
+parse_portmask(uint8_t max_ports, const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+ uint8_t count = 0;
+
+ if (portmask == NULL || *portmask == '\0')
+ return -1;
+
+ /* convert parameter to a number and verify */
+ pm = strtoul(portmask, &end, 16);
+ if (end == NULL || *end != '\0' || pm == 0)
+ return -1;
+
+ /* loop through bits of the mask and mark ports */
+ while (pm != 0){
+ if (pm & 0x01){ /* bit is set in mask, use port */
+ if (count >= max_ports)
+ printf("WARNING: requested port %u not present"
+ " - ignoring\n", (unsigned)count);
+ else
+ ports->id[ports->num_ports++] = count;
+ }
+ pm = (pm >> 1);
+ count++;
+ }
+
+ return 0;
+}
+
+/**
+ * Take the number of clients parameter passed to the app
+ * and convert to a number to store in the num_clients variable
+ */
+static int
+parse_num_clients(const char *clients)
+{
+ char *end = NULL;
+ unsigned long temp;
+
+ if (clients == NULL || *clients == '\0')
+ return -1;
+
+ temp = strtoul(clients, &end, 10);
+ if (end == NULL || *end != '\0' || temp == 0)
+ return -1;
+
+ num_clients = (uint8_t)temp;
+ return 0;
+}
+
+/**
+ * The application specific arguments follow the DPDK-specific
+ * arguments which are stripped by the DPDK init. This function
+ * processes these application arguments, printing usage info
+ * on error.
+ */
+int
+parse_app_args(uint8_t max_ports, int argc, char *argv[])
+{
+ int option_index, opt;
+ char **argvopt = argv;
+ static struct option lgopts[] = { /* no long options */
+ {NULL, 0, 0, 0 }
+ };
+ progname = argv[0];
+
+ while ((opt = getopt_long(argc, argvopt, "n:p:", lgopts,
+ &option_index)) != EOF){
+ switch (opt){
+ case 'p':
+ if (parse_portmask(max_ports, optarg) != 0){
+ usage();
+ return -1;
+ }
+ break;
+ case 'n':
+ if (parse_num_clients(optarg) != 0){
+ usage();
+ return -1;
+ }
+ break;
+ default:
+ printf("ERROR: Unknown option '%c'\n", opt);
+ usage();
+ return -1;
+ }
+ }
+
+ if (ports->num_ports == 0 || num_clients == 0){
+ usage();
+ return -1;
+ }
+
+ if (ports->num_ports % 2 != 0){
+ printf("ERROR: application requires an even number of ports to use\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/examples/multi_process/client_server_mp/mp_server/args.h b/examples/multi_process/client_server_mp/mp_server/args.h
new file mode 100644
index 00000000..23af1bd3
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/args.h
@@ -0,0 +1,39 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ARGS_H_
+#define _ARGS_H_
+
+int parse_app_args(uint8_t max_ports, int argc, char *argv[]);
+
+#endif /* ifndef _ARGS_H_ */
diff --git a/examples/multi_process/client_server_mp/mp_server/init.c b/examples/multi_process/client_server_mp/mp_server/init.c
new file mode 100644
index 00000000..ecb61c68
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/init.c
@@ -0,0 +1,305 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <stdarg.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_atomic.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_debug.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_memcpy.h>
+#include <rte_mbuf.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_malloc.h>
+#include <rte_fbk_hash.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+
+#include "common.h"
+#include "args.h"
+#include "init.h"
+
+#define MBUFS_PER_CLIENT 1536
+#define MBUFS_PER_PORT 1536
+#define MBUF_CACHE_SIZE 512
+
+#define RTE_MP_RX_DESC_DEFAULT 512
+#define RTE_MP_TX_DESC_DEFAULT 512
+#define CLIENT_QUEUE_RINGSIZE 128
+
+#define NO_FLAGS 0
+
+/* The mbuf pool for packet rx */
+struct rte_mempool *pktmbuf_pool;
+
+/* array of info/queues for clients */
+struct client *clients = NULL;
+
+/* the port details */
+struct port_info *ports;
+
+/**
+ * Initialise the mbuf pool for packet reception for the NIC, and any other
+ * buffer pools needed by the app - currently none.
+ */
+static int
+init_mbuf_pools(void)
+{
+ const unsigned num_mbufs = (num_clients * MBUFS_PER_CLIENT) \
+ + (ports->num_ports * MBUFS_PER_PORT);
+
+ /* don't pass single-producer/single-consumer flags to mbuf create as it
+ * seems faster to use a cache instead */
+ printf("Creating mbuf pool '%s' [%u mbufs] ...\n",
+ PKTMBUF_POOL_NAME, num_mbufs);
+ pktmbuf_pool = rte_pktmbuf_pool_create(PKTMBUF_POOL_NAME, num_mbufs,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+ return pktmbuf_pool == NULL; /* 0 on success */
+}
+
+/**
+ * Initialise an individual port:
+ * - configure number of rx and tx rings
+ * - set up each rx ring, to pull from the main mbuf pool
+ * - set up each tx ring
+ * - start the port and report its status to stdout
+ */
+static int
+init_port(uint8_t port_num)
+{
+ /* for port configuration all features are off by default */
+ const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS
+ }
+ };
+ const uint16_t rx_rings = 1, tx_rings = num_clients;
+ const uint16_t rx_ring_size = RTE_MP_RX_DESC_DEFAULT;
+ const uint16_t tx_ring_size = RTE_MP_TX_DESC_DEFAULT;
+
+ uint16_t q;
+ int retval;
+
+ printf("Port %u init ... ", (unsigned)port_num);
+ fflush(stdout);
+
+ /* Standard DPDK port initialisation - config port, then set up
+ * rx and tx rings */
+ if ((retval = rte_eth_dev_configure(port_num, rx_rings, tx_rings,
+ &port_conf)) != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port_num, q, rx_ring_size,
+ rte_eth_dev_socket_id(port_num),
+ NULL, pktmbuf_pool);
+ if (retval < 0) return retval;
+ }
+
+ for ( q = 0; q < tx_rings; q ++ ) {
+ retval = rte_eth_tx_queue_setup(port_num, q, tx_ring_size,
+ rte_eth_dev_socket_id(port_num),
+ NULL);
+ if (retval < 0) return retval;
+ }
+
+ rte_eth_promiscuous_enable(port_num);
+
+ retval = rte_eth_dev_start(port_num);
+ if (retval < 0) return retval;
+
+ printf( "done: \n");
+
+ return 0;
+}
+
+/**
+ * Set up the DPDK rings which will be used to pass packets, via
+ * pointers, between the multi-process server and client processes.
+ * Each client needs one RX queue.
+ */
+static int
+init_shm_rings(void)
+{
+ unsigned i;
+ unsigned socket_id;
+ const char * q_name;
+ const unsigned ringsize = CLIENT_QUEUE_RINGSIZE;
+
+ clients = rte_malloc("client details",
+ sizeof(*clients) * num_clients, 0);
+ if (clients == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate memory for client program details\n");
+
+ for (i = 0; i < num_clients; i++) {
+ /* Create an RX queue for each client */
+ socket_id = rte_socket_id();
+ q_name = get_rx_queue_name(i);
+ clients[i].rx_q = rte_ring_create(q_name,
+ ringsize, socket_id,
+ RING_F_SP_ENQ | RING_F_SC_DEQ ); /* single prod, single cons */
+ if (clients[i].rx_q == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create rx ring queue for client %u\n", i);
+ }
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << ports->id[portid])) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(ports->id[portid], &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", ports->id[portid],
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)ports->id[portid]);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/**
+ * Main init function for the multi-process server app,
+ * calls subfunctions to do each stage of the initialisation.
+ */
+int
+init(int argc, char *argv[])
+{
+ int retval;
+ const struct rte_memzone *mz;
+ uint8_t i, total_ports;
+
+ /* init EAL, parsing EAL args */
+ retval = rte_eal_init(argc, argv);
+ if (retval < 0)
+ return -1;
+ argc -= retval;
+ argv += retval;
+
+ /* get total number of ports */
+ total_ports = rte_eth_dev_count();
+
+ /* set up array for port data */
+ mz = rte_memzone_reserve(MZ_PORT_INFO, sizeof(*ports),
+ rte_socket_id(), NO_FLAGS);
+ if (mz == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot reserve memory zone for port information\n");
+ memset(mz->addr, 0, sizeof(*ports));
+ ports = mz->addr;
+
+ /* parse additional, application arguments */
+ retval = parse_app_args(total_ports, argc, argv);
+ if (retval != 0)
+ return -1;
+
+ /* initialise mbuf pools */
+ retval = init_mbuf_pools();
+ if (retval != 0)
+ rte_exit(EXIT_FAILURE, "Cannot create needed mbuf pools\n");
+
+ /* now initialise the ports we will use */
+ for (i = 0; i < ports->num_ports; i++) {
+ retval = init_port(ports->id[i]);
+ if (retval != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialise port %u\n",
+ (unsigned)i);
+ }
+
+ check_all_ports_link_status(ports->num_ports, (~0x0));
+
+ /* initialise the client queues/rings for inter-eu comms */
+ init_shm_rings();
+
+ return 0;
+}
diff --git a/examples/multi_process/client_server_mp/mp_server/init.h b/examples/multi_process/client_server_mp/mp_server/init.h
new file mode 100644
index 00000000..7333614d
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/init.h
@@ -0,0 +1,72 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _INIT_H_
+#define _INIT_H_
+
+/*
+ * #include <rte_ring.h>
+ * #include "args.h"
+ */
+
+/*
+ * Define a client structure with all needed info, including
+ * stats from the clients.
+ */
+struct client {
+ struct rte_ring *rx_q;
+ unsigned client_id;
+ /* these stats hold how many packets the client will actually receive,
+ * and how many packets were dropped because the client's queue was full.
+ * The port-info stats, in contrast, record how many packets were received
+ * or transmitted on an actual NIC port.
+ */
+ struct {
+ volatile uint64_t rx;
+ volatile uint64_t rx_drop;
+ } stats;
+};
+
+extern struct client *clients;
+
+/* the shared port information: port numbers, rx and tx stats etc. */
+extern struct port_info *ports;
+
+extern struct rte_mempool *pktmbuf_pool;
+extern uint8_t num_clients;
+extern unsigned num_sockets;
+extern struct port_info *ports;
+
+int init(int argc, char *argv[]);
+
+#endif /* ifndef _INIT_H_ */
diff --git a/examples/multi_process/client_server_mp/mp_server/main.c b/examples/multi_process/client_server_mp/mp_server/main.c
new file mode 100644
index 00000000..de54c674
--- /dev/null
+++ b/examples/multi_process/client_server_mp/mp_server/main.c
@@ -0,0 +1,319 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <inttypes.h>
+#include <sys/queue.h>
+#include <errno.h>
+#include <netinet/ip.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_byteorder.h>
+#include <rte_launch.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_atomic.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+#include <rte_mempool.h>
+#include <rte_memcpy.h>
+#include <rte_mbuf.h>
+#include <rte_ether.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ethdev.h>
+#include <rte_byteorder.h>
+#include <rte_malloc.h>
+#include <rte_fbk_hash.h>
+#include <rte_string_fns.h>
+
+#include "common.h"
+#include "args.h"
+#include "init.h"
+
+/*
+ * When doing reads from the NIC or the client queues,
+ * use this batch size
+ */
+#define PACKET_READ_SIZE 32
+
+/*
+ * Local buffers to put packets in, used to send packets in bursts to the
+ * clients
+ */
+struct client_rx_buf {
+ struct rte_mbuf *buffer[PACKET_READ_SIZE];
+ uint16_t count;
+};
+
+/* One buffer per client rx queue - dynamically allocate array */
+static struct client_rx_buf *cl_rx_buf;
+
+static const char *
+get_printable_mac_addr(uint8_t port)
+{
+ static const char err_address[] = "00:00:00:00:00:00";
+ static char addresses[RTE_MAX_ETHPORTS][sizeof(err_address)];
+
+ if (unlikely(port >= RTE_MAX_ETHPORTS))
+ return err_address;
+ if (unlikely(addresses[port][0]=='\0')){
+ struct ether_addr mac;
+ rte_eth_macaddr_get(port, &mac);
+ snprintf(addresses[port], sizeof(addresses[port]),
+ "%02x:%02x:%02x:%02x:%02x:%02x\n",
+ mac.addr_bytes[0], mac.addr_bytes[1], mac.addr_bytes[2],
+ mac.addr_bytes[3], mac.addr_bytes[4], mac.addr_bytes[5]);
+ }
+ return addresses[port];
+}
+
+/*
+ * This function displays the recorded statistics for each port
+ * and for each client. It uses ANSI terminal codes to clear
+ * screen when called. It is called from a single non-master
+ * thread in the server process, when the process is run with more
+ * than one lcore enabled.
+ */
+static void
+do_stats_display(void)
+{
+ unsigned i, j;
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+ uint64_t port_tx[RTE_MAX_ETHPORTS], port_tx_drop[RTE_MAX_ETHPORTS];
+ uint64_t client_tx[MAX_CLIENTS], client_tx_drop[MAX_CLIENTS];
+
+ /* to get TX stats, we need to do some summing calculations */
+ memset(port_tx, 0, sizeof(port_tx));
+ memset(port_tx_drop, 0, sizeof(port_tx_drop));
+ memset(client_tx, 0, sizeof(client_tx));
+ memset(client_tx_drop, 0, sizeof(client_tx_drop));
+
+ for (i = 0; i < num_clients; i++){
+ const volatile struct tx_stats *tx = &ports->tx_stats[i];
+ for (j = 0; j < ports->num_ports; j++){
+ /* assign to local variables here, save re-reading volatile vars */
+ const uint64_t tx_val = tx->tx[ports->id[j]];
+ const uint64_t drop_val = tx->tx_drop[ports->id[j]];
+ port_tx[j] += tx_val;
+ port_tx_drop[j] += drop_val;
+ client_tx[i] += tx_val;
+ client_tx_drop[i] += drop_val;
+ }
+ }
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("PORTS\n");
+ printf("-----\n");
+ for (i = 0; i < ports->num_ports; i++)
+ printf("Port %u: '%s'\t", (unsigned)ports->id[i],
+ get_printable_mac_addr(ports->id[i]));
+ printf("\n\n");
+ for (i = 0; i < ports->num_ports; i++){
+ printf("Port %u - rx: %9"PRIu64"\t"
+ "tx: %9"PRIu64"\n",
+ (unsigned)ports->id[i], ports->rx_stats.rx[i],
+ port_tx[i]);
+ }
+
+ printf("\nCLIENTS\n");
+ printf("-------\n");
+ for (i = 0; i < num_clients; i++){
+ const unsigned long long rx = clients[i].stats.rx;
+ const unsigned long long rx_drop = clients[i].stats.rx_drop;
+ printf("Client %2u - rx: %9llu, rx_drop: %9llu\n"
+ " tx: %9"PRIu64", tx_drop: %9"PRIu64"\n",
+ i, rx, rx_drop, client_tx[i], client_tx_drop[i]);
+ }
+
+ printf("\n");
+}
+
+/*
+ * The function called from each non-master lcore used by the process.
+ * The test_and_set function is used to randomly pick a single lcore on which
+ * the code to display the statistics will run. Otherwise, the code just
+ * repeatedly sleeps.
+ */
+static int
+sleep_lcore(__attribute__((unused)) void *dummy)
+{
+ /* Used to pick a display thread - static, so zero-initialised */
+ static rte_atomic32_t display_stats;
+
+ /* Only one core should display stats */
+ if (rte_atomic32_test_and_set(&display_stats)) {
+ const unsigned sleeptime = 1;
+ printf("Core %u displaying statistics\n", rte_lcore_id());
+
+ /* Longer initial pause so above printf is seen */
+ sleep(sleeptime * 3);
+
+ /* Loop forever: sleep always returns 0 or <= param */
+ while (sleep(sleeptime) <= sleeptime)
+ do_stats_display();
+ }
+ return 0;
+}
+
+/*
+ * Function to set all the client statistic values to zero.
+ * Called at program startup.
+ */
+static void
+clear_stats(void)
+{
+ unsigned i;
+
+ for (i = 0; i < num_clients; i++)
+ clients[i].stats.rx = clients[i].stats.rx_drop = 0;
+}
+
+/*
+ * send a burst of traffic to a client, assuming there are packets
+ * available to be sent to this client
+ */
+static void
+flush_rx_queue(uint16_t client)
+{
+ uint16_t j;
+ struct client *cl;
+
+ if (cl_rx_buf[client].count == 0)
+ return;
+
+ cl = &clients[client];
+ if (rte_ring_enqueue_bulk(cl->rx_q, (void **)cl_rx_buf[client].buffer,
+ cl_rx_buf[client].count) != 0){
+ for (j = 0; j < cl_rx_buf[client].count; j++)
+ rte_pktmbuf_free(cl_rx_buf[client].buffer[j]);
+ cl->stats.rx_drop += cl_rx_buf[client].count;
+ }
+ else
+ cl->stats.rx += cl_rx_buf[client].count;
+
+ cl_rx_buf[client].count = 0;
+}
+
+/*
+ * marks a packet down to be sent to a particular client process
+ */
+static inline void
+enqueue_rx_packet(uint8_t client, struct rte_mbuf *buf)
+{
+ cl_rx_buf[client].buffer[cl_rx_buf[client].count++] = buf;
+}
+
+/*
+ * This function takes a group of packets and routes them
+ * individually to the client process. Very simply round-robins the packets
+ * without checking any of the packet contents.
+ */
+static void
+process_packets(uint32_t port_num __rte_unused,
+ struct rte_mbuf *pkts[], uint16_t rx_count)
+{
+ uint16_t i;
+ uint8_t client = 0;
+
+ for (i = 0; i < rx_count; i++) {
+ enqueue_rx_packet(client, pkts[i]);
+
+ if (++client == num_clients)
+ client = 0;
+ }
+
+ for (i = 0; i < num_clients; i++)
+ flush_rx_queue(i);
+}
+
+/*
+ * Function called by the master lcore of the DPDK process.
+ */
+static void
+do_packet_forwarding(void)
+{
+ unsigned port_num = 0; /* indexes the port[] array */
+
+ for (;;) {
+ struct rte_mbuf *buf[PACKET_READ_SIZE];
+ uint16_t rx_count;
+
+ /* read a port */
+ rx_count = rte_eth_rx_burst(ports->id[port_num], 0, \
+ buf, PACKET_READ_SIZE);
+ ports->rx_stats.rx[port_num] += rx_count;
+
+ /* Now process the NIC packets read */
+ if (likely(rx_count > 0))
+ process_packets(port_num, buf, rx_count);
+
+ /* move to next port */
+ if (++port_num == ports->num_ports)
+ port_num = 0;
+ }
+}
+
+int
+main(int argc, char *argv[])
+{
+ /* initialise the system */
+ if (init(argc, argv) < 0 )
+ return -1;
+ RTE_LOG(INFO, APP, "Finished Process Init.\n");
+
+ cl_rx_buf = calloc(num_clients, sizeof(cl_rx_buf[0]));
+
+ /* clear statistics */
+ clear_stats();
+
+ /* put all other cores to sleep bar master */
+ rte_eal_mp_remote_launch(sleep_lcore, NULL, SKIP_MASTER);
+
+ do_packet_forwarding();
+ return 0;
+}
diff --git a/examples/multi_process/client_server_mp/shared/common.h b/examples/multi_process/client_server_mp/shared/common.h
new file mode 100644
index 00000000..631c4632
--- /dev/null
+++ b/examples/multi_process/client_server_mp/shared/common.h
@@ -0,0 +1,87 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _COMMON_H_
+#define _COMMON_H_
+
+#define MAX_CLIENTS 16
+
+/*
+ * Shared port info, including statistics information for display by server.
+ * Structure will be put in a memzone.
+ * - All port id values share one cache line as this data will be read-only
+ * during operation.
+ * - All rx statistic values share cache lines, as this data is written only
+ * by the server process. (rare reads by stats display)
+ * - The tx statistics have values for all ports per cache line, but the stats
+ * themselves are written by the clients, so we have a distinct set, on different
+ * cache lines for each client to use.
+ */
+struct rx_stats{
+ uint64_t rx[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+
+struct tx_stats{
+ uint64_t tx[RTE_MAX_ETHPORTS];
+ uint64_t tx_drop[RTE_MAX_ETHPORTS];
+} __rte_cache_aligned;
+
+struct port_info {
+ uint8_t num_ports;
+ uint8_t id[RTE_MAX_ETHPORTS];
+ volatile struct rx_stats rx_stats;
+ volatile struct tx_stats tx_stats[MAX_CLIENTS];
+};
+
+/* define common names for structures shared between server and client */
+#define MP_CLIENT_RXQ_NAME "MProc_Client_%u_RX"
+#define PKTMBUF_POOL_NAME "MProc_pktmbuf_pool"
+#define MZ_PORT_INFO "MProc_port_info"
+
+/*
+ * Given the rx queue name template above, get the queue name
+ */
+static inline const char *
+get_rx_queue_name(unsigned id)
+{
+ /* buffer for return value. Size calculated by %u being replaced
+ * by maximum 3 digits (plus an extra byte for safety) */
+ static char buffer[sizeof(MP_CLIENT_RXQ_NAME) + 2];
+
+ snprintf(buffer, sizeof(buffer) - 1, MP_CLIENT_RXQ_NAME, id);
+ return buffer;
+}
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+#endif
diff --git a/examples/multi_process/l2fwd_fork/Makefile b/examples/multi_process/l2fwd_fork/Makefile
new file mode 100644
index 00000000..ff257a35
--- /dev/null
+++ b/examples/multi_process/l2fwd_fork/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l2fwd_fork
+
+# all source are stored in SRCS-y
+SRCS-y := main.c flib.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/l2fwd_fork/flib.c b/examples/multi_process/l2fwd_fork/flib.c
new file mode 100644
index 00000000..343f09f1
--- /dev/null
+++ b/examples/multi_process/l2fwd_fork/flib.c
@@ -0,0 +1,313 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <sys/wait.h>
+#include <sys/prctl.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+#include <dirent.h>
+#include <signal.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+
+#include "flib.h"
+
+#define SIG_PARENT_EXIT SIGUSR1
+
+struct lcore_stat {
+ pid_t pid; /**< pthread identifier */
+ lcore_function_t *f; /**< function to call */
+ void *arg; /**< argument of function */
+ slave_exit_notify *cb_fn;
+} __rte_cache_aligned;
+
+
+static struct lcore_stat *core_cfg;
+static uint16_t *lcore_cfg = NULL;
+
+/* signal handler to be notified after parent leaves */
+static void
+sighand_parent_exit(int sig)
+{
+ printf("lcore = %u : Find parent leaves, sig=%d\n", rte_lcore_id(),
+ sig);
+ printf("Child leaving\n");
+ exit(0);
+
+ return;
+}
+
+/**
+ * Real function entrance ran in slave process
+ **/
+static int
+slave_proc_func(void)
+{
+ struct rte_config *config;
+ unsigned slave_id = rte_lcore_id();
+ struct lcore_stat *cfg = &core_cfg[slave_id];
+
+ if (prctl(PR_SET_PDEATHSIG, SIG_PARENT_EXIT, 0, 0, 0, 0) != 0)
+ printf("Warning: Slave can't register for being notified in"
+ "case master process exited\n");
+ else {
+ struct sigaction act;
+ memset(&act, 0 , sizeof(act));
+ act.sa_handler = sighand_parent_exit;
+ if (sigaction(SIG_PARENT_EXIT, &act, NULL) != 0)
+ printf("Fail to register signal handler:%d\n", SIG_PARENT_EXIT);
+ }
+
+ /* Set slave process to SECONDARY to avoid operation like dev_start/stop etc */
+ config = rte_eal_get_configuration();
+ if (NULL == config)
+ printf("Warning:Can't get rte_config\n");
+ else
+ config->process_type = RTE_PROC_SECONDARY;
+
+ printf("Core %u is ready (pid=%d)\n", slave_id, (int)cfg->pid);
+
+ exit(cfg->f(cfg->arg));
+}
+
+/**
+ * function entrance ran in master thread, which will spawn slave process and wait until
+ * specific slave exited.
+ **/
+static int
+lcore_func(void *arg __attribute__((unused)))
+{
+ unsigned slave_id = rte_lcore_id();
+ struct lcore_stat *cfg = &core_cfg[slave_id];
+ int pid, stat;
+
+ if (rte_get_master_lcore() == slave_id)
+ return cfg->f(cfg->arg);
+
+ /* fork a slave process */
+ pid = fork();
+
+ if (pid == -1) {
+ printf("Failed to fork\n");
+ return -1;
+ } else if (pid == 0) /* child */
+ return slave_proc_func();
+ else { /* parent */
+ cfg->pid = pid;
+
+ waitpid(pid, &stat, 0);
+
+ cfg->pid = 0;
+ cfg->f = NULL;
+ cfg->arg = NULL;
+ /* Notify slave's exit if applicable */
+ if(cfg->cb_fn)
+ cfg->cb_fn(slave_id, stat);
+ return stat;
+ }
+}
+
+static int
+lcore_id_init(void)
+{
+ int i;
+ /* Setup lcore ID allocation map */
+ lcore_cfg = rte_zmalloc("LCORE_ID_MAP",
+ sizeof(uint16_t) * RTE_MAX_LCORE,
+ RTE_CACHE_LINE_SIZE);
+
+ if(lcore_cfg == NULL)
+ rte_panic("Failed to malloc\n");
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (rte_lcore_is_enabled(i))
+ lcore_cfg[i] = 1;
+ }
+ return 0;
+}
+
+int
+flib_assign_lcore_id(void)
+{
+ unsigned i;
+ int ret;
+
+ /**
+ * thread assigned a lcore id previously, or a slave thread. But still have
+ * a bug here: If the core mask includes core 0, and that core call this
+ * function, it still can get a new lcore id.
+ **/
+ if (rte_lcore_id() != 0)
+ return -1;
+
+ do {
+ /* Find a lcore id not used yet, avoid to use lcore ID 0 */
+ for (i = 1; i < RTE_MAX_LCORE; i++) {
+ if (lcore_cfg[i] == 0)
+ break;
+ }
+ if (i == RTE_MAX_LCORE)
+ return -1;
+
+ /* Assign new lcore id to this thread */
+
+ ret = rte_atomic16_cmpset(&lcore_cfg[i], 0, 1);
+ } while (unlikely(ret == 0));
+
+ RTE_PER_LCORE(_lcore_id) = i;
+ return i;
+}
+
+void
+flib_free_lcore_id(unsigned lcore_id)
+{
+ /* id is not valid or belongs to pinned core id */
+ if (lcore_id >= RTE_MAX_LCORE || lcore_id == 0 ||
+ rte_lcore_is_enabled(lcore_id))
+ return;
+
+ lcore_cfg[lcore_id] = 0;
+}
+
+int
+flib_register_slave_exit_notify(unsigned slave_id,
+ slave_exit_notify *cb)
+{
+ if (cb == NULL)
+ return -EFAULT;
+
+ if (!rte_lcore_is_enabled(slave_id))
+ return -ENOENT;
+
+ core_cfg[slave_id].cb_fn = cb;
+
+ return 0;
+}
+
+enum slave_stat
+flib_query_slave_status(unsigned slave_id)
+{
+ if (!rte_lcore_is_enabled(slave_id))
+ return ST_FREEZE;
+ /* pid only be set when slave process spawned */
+ if (core_cfg[slave_id].pid != 0)
+ return ST_RUN;
+ else
+ return ST_IDLE;
+}
+
+int
+flib_remote_launch(lcore_function_t *f,
+ void *arg, unsigned slave_id)
+{
+ if (f == NULL)
+ return -1;
+
+ if (!rte_lcore_is_enabled(slave_id))
+ return -1;
+
+ /* Wait until specific lcore state change to WAIT */
+ rte_eal_wait_lcore(slave_id);
+
+ core_cfg[slave_id].f = f;
+ core_cfg[slave_id].arg = arg;
+
+ return rte_eal_remote_launch(lcore_func, NULL, slave_id);
+}
+
+int
+flib_mp_remote_launch(lcore_function_t *f, void *arg,
+ enum rte_rmt_call_master_t call_master)
+{
+ int i;
+
+ RTE_LCORE_FOREACH_SLAVE(i) {
+ core_cfg[i].arg = arg;
+ core_cfg[i].f = f;
+ }
+
+ return rte_eal_mp_remote_launch(lcore_func, NULL, call_master);
+}
+
+int
+flib_init(void)
+{
+ if ((core_cfg = rte_zmalloc("core_cfg",
+ sizeof(struct lcore_stat) * RTE_MAX_LCORE,
+ RTE_CACHE_LINE_SIZE)) == NULL ) {
+ printf("rte_zmalloc failed\n");
+ return -1;
+ }
+
+ if (lcore_id_init() != 0) {
+ printf("lcore_id_init failed\n");
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/multi_process/l2fwd_fork/flib.h b/examples/multi_process/l2fwd_fork/flib.h
new file mode 100644
index 00000000..711e3b6d
--- /dev/null
+++ b/examples/multi_process/l2fwd_fork/flib.h
@@ -0,0 +1,149 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __FLIB_H
+#define __FLIB_H
+
+/* callback function pointer when specific slave leaves */
+typedef void (slave_exit_notify)(unsigned slaveid, int stat);
+
+enum slave_stat{
+ ST_FREEZE = 1,
+ ST_IDLE,
+ ST_RUN,
+ ST_ZOMBIE, /* Not implemented yet */
+};
+
+/**
+ * Initialize the fork lib.
+ *
+ * @return
+ * - 0 : fork lib initialized successfully
+ * - -1 : fork lib initialized failed
+ */
+int flib_init(void);
+
+/**
+ * Check that every SLAVE lcores are in WAIT state, then call
+ * flib_remote_launch() for all of them. If call_master is true
+ * (set to CALL_MASTER), also call the function on the master lcore.
+ *
+ * @param f:
+ * function pointer need to run
+ * @param arg:
+ * argument for f to carry
+ * @param call_master
+ * - SKIP_MASTER : only launch function on slave lcores
+ * - CALL_MASTER : launch function on master and slave lcores
+ * @return
+ * - 0 : function execute successfully
+ * - -1 : function execute failed
+ */
+int flib_mp_remote_launch(lcore_function_t *f,
+ void *arg, enum rte_rmt_call_master_t call_master);
+
+/**
+ * Send a message to a slave lcore identified by slave_id to call a
+ * function f with argument arg.
+ *
+ * @param f:
+ * function pointer need to run
+ * @param arg:
+ * argument for f to carry
+ * @param slave_id
+ * slave lcore id to run on
+ * @return
+ * - 0 : function execute successfully
+ * - -1 : function execute failed
+ */
+int flib_remote_launch(lcore_function_t *f,
+ void *arg, unsigned slave_id);
+
+/**
+ * Query the running stat for specific slave, wont' work in with master id
+ *
+ * @param slave_id:
+ * lcore id which should not be master id
+ * @return
+ * - ST_FREEZE : lcore is not in enabled core mask
+ * - ST_IDLE : lcore is idle
+ * - ST_RUN : lcore is running something
+ */
+enum slave_stat
+flib_query_slave_status(unsigned slave_id);
+
+/**
+ * Register a callback function to be notified in case specific slave exit.
+ *
+ * @param slave_id:
+ * lcore id which should not be master id
+ * @param cb:
+ * callback pointer to register
+ * @return
+ * - 0 : function execute successfully
+ * - -EFAULT : argument error
+ * - -ENOENT : slave_id not correct
+ */
+int flib_register_slave_exit_notify(unsigned slave_id,
+ slave_exit_notify *cb);
+
+/**
+ * Assign a lcore ID to non-slave thread. Non-slave thread refers to thread that
+ * not created by function rte_eal_remote_launch or rte_eal_mp_remote_launch.
+ * These threads can either bind lcore or float among differnt lcores.
+ * This lcore ID will be unique in multi-thread or multi-process DPDK running
+ * environment, then it can benefit from using the cache mechanism provided in
+ * mempool library.
+ * After calling successfully, use rte_lcore_id() to get the assigned lcore ID, but
+ * other lcore funtions can't guarantee to work correctly.
+ *
+ * @return
+ * - -1 : can't assign a lcore id with 3 possibilities.
+ * - it's not non-slave thread.
+ * - it had assign a lcore id previously
+ * - the lcore id is running out.
+ * - > 0 : the assigned lcore id.
+ */
+int flib_assign_lcore_id(void);
+
+/**
+ * Free the lcore_id that assigned in flib_assign_lcore_id().
+ * call it in case non-slave thread is leaving or left.
+ *
+ * @param lcore_id
+ * The identifier of the lcore, which MUST be between 1 and
+ * RTE_MAX_LCORE-1.
+ */
+void flib_free_lcore_id(unsigned lcore_id);
+
+#endif /* __FLIB_H */
diff --git a/examples/multi_process/l2fwd_fork/main.c b/examples/multi_process/l2fwd_fork/main.c
new file mode 100644
index 00000000..2dc8b829
--- /dev/null
+++ b/examples/multi_process/l2fwd_fork/main.c
@@ -0,0 +1,1288 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdint.h>
+#include <sched.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/queue.h>
+#include <netinet/in.h>
+#include <setjmp.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_spinlock.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+
+#include "flib.h"
+
+#define RTE_LOGTYPE_L2FWD RTE_LOGTYPE_USER1
+#define MBUF_NAME "mbuf_pool_%d"
+#define MBUF_SIZE \
+(RTE_MBUF_DEFAULT_DATAROOM + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+#define NB_MBUF 8192
+#define RING_MASTER_NAME "l2fwd_ring_m2s_"
+#define RING_SLAVE_NAME "l2fwd_ring_s2m_"
+#define MAX_NAME_LEN 32
+/* RECREATE flag indicate needs initialize resource and launch slave_core again */
+#define SLAVE_RECREATE_FLAG 0x1
+/* RESTART flag indicate needs restart port and send START command again */
+#define SLAVE_RESTART_FLAG 0x2
+#define INVALID_MAPPING_ID ((unsigned)LCORE_ID_ANY)
+/* Maximum message buffer per slave */
+#define NB_CORE_MSGBUF 32
+enum l2fwd_cmd{
+ CMD_START,
+ CMD_STOP,
+};
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static struct ether_addr l2fwd_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* mask of enabled ports */
+static uint32_t l2fwd_enabled_port_mask = 0;
+
+/* list of enabled ports */
+static uint32_t l2fwd_dst_ports[RTE_MAX_ETHPORTS];
+
+static unsigned int l2fwd_rx_queue_per_lcore = 1;
+
+struct mbuf_table {
+ unsigned len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT 16
+struct lcore_queue_conf {
+ unsigned n_rx_port;
+ unsigned rx_port_list[MAX_RX_QUEUE_PER_LCORE];
+} __rte_cache_aligned;
+struct lcore_queue_conf lcore_queue_conf[RTE_MAX_LCORE];
+
+struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+struct lcore_resource_struct {
+ int enabled; /* Only set in case this lcore involved into packet forwarding */
+ int flags; /* Set only slave need to restart or recreate */
+ unsigned lcore_id; /* lcore ID */
+ unsigned pair_id; /* dependency lcore ID on port */
+ char ring_name[2][MAX_NAME_LEN];
+ /* ring[0] for master send cmd, slave read */
+ /* ring[1] for slave send ack, master read */
+ struct rte_ring *ring[2];
+ int port_num; /* Total port numbers */
+ uint8_t port[RTE_MAX_ETHPORTS]; /* Port id for that lcore to receive packets */
+}__attribute__((packed)) __rte_cache_aligned;
+
+static struct lcore_resource_struct lcore_resource[RTE_MAX_LCORE];
+static struct rte_mempool *message_pool;
+static rte_spinlock_t res_lock = RTE_SPINLOCK_INITIALIZER;
+/* use floating processes */
+static int float_proc = 0;
+/* Save original cpu affinity */
+struct cpu_aff_arg{
+ cpu_set_t set;
+ size_t size;
+}cpu_aff;
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool * l2fwd_pktmbuf_pool[RTE_MAX_ETHPORTS];
+
+/* Per-port statistics struct */
+struct l2fwd_port_statistics {
+ uint64_t tx;
+ uint64_t rx;
+ uint64_t dropped;
+} __rte_cache_aligned;
+struct l2fwd_port_statistics *port_statistics;
+/**
+ * pointer to lcore ID mapping array, used to return lcore id in case slave
+ * process exited unexpectedly, use only floating process option applied
+ **/
+unsigned *mapping_id;
+
+/* A tsc-based timer responsible for triggering statistics printout */
+#define TIMER_MILLISECOND 2000000ULL /* around 1ms at 2 Ghz */
+#define MAX_TIMER_PERIOD 86400 /* 1 day max */
+static int64_t timer_period = 10 * TIMER_MILLISECOND * 1000; /* default period is 10 seconds */
+
+static int l2fwd_launch_one_lcore(void *dummy);
+
+/* Print out statistics on packets dropped */
+static void
+print_stats(void)
+{
+ uint64_t total_packets_dropped, total_packets_tx, total_packets_rx;
+ unsigned portid;
+
+ total_packets_dropped = 0;
+ total_packets_tx = 0;
+ total_packets_rx = 0;
+
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char topLeft[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, topLeft);
+
+ printf("\nPort statistics ====================================");
+
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ /* skip disabled ports */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ printf("\nStatistics for port %u ------------------------------"
+ "\nPackets sent: %24"PRIu64
+ "\nPackets received: %20"PRIu64
+ "\nPackets dropped: %21"PRIu64,
+ portid,
+ port_statistics[portid].tx,
+ port_statistics[portid].rx,
+ port_statistics[portid].dropped);
+
+ total_packets_dropped += port_statistics[portid].dropped;
+ total_packets_tx += port_statistics[portid].tx;
+ total_packets_rx += port_statistics[portid].rx;
+ }
+ printf("\nAggregate statistics ==============================="
+ "\nTotal packets sent: %18"PRIu64
+ "\nTotal packets received: %14"PRIu64
+ "\nTotal packets dropped: %15"PRIu64,
+ total_packets_tx,
+ total_packets_rx,
+ total_packets_dropped);
+ printf("\n====================================================\n");
+}
+
+static int
+clear_cpu_affinity(void)
+{
+ int s;
+
+ s = sched_setaffinity(0, cpu_aff.size, &cpu_aff.set);
+ if (s != 0) {
+ printf("sched_setaffinity failed:%s\n", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int
+get_cpu_affinity(void)
+{
+ int s;
+
+ cpu_aff.size = sizeof(cpu_set_t);
+ CPU_ZERO(&cpu_aff.set);
+
+ s = sched_getaffinity(0, cpu_aff.size, &cpu_aff.set);
+ if (s != 0) {
+ printf("sched_getaffinity failed:%s\n", strerror(errno));
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * This fnciton demonstrates the approach to create ring in first instance
+ * or re-attach an existed ring in later instance.
+ **/
+static struct rte_ring *
+create_ring(const char *name, unsigned count,
+ int socket_id,unsigned flags)
+{
+ struct rte_ring *ring;
+
+ if (name == NULL)
+ return NULL;
+
+ /* If already create, just attached it */
+ if (likely((ring = rte_ring_lookup(name)) != NULL))
+ return ring;
+
+ /* First call it, create one */
+ return rte_ring_create(name, count, socket_id, flags);
+}
+
+/* Malloc with rte_malloc on structures that shared by master and slave */
+static int
+l2fwd_malloc_shared_struct(void)
+{
+ port_statistics = rte_zmalloc("port_stat",
+ sizeof(struct l2fwd_port_statistics) * RTE_MAX_ETHPORTS,
+ 0);
+ if (port_statistics == NULL)
+ return -1;
+
+ /* allocate mapping_id array */
+ if (float_proc) {
+ int i;
+ mapping_id = rte_malloc("mapping_id", sizeof(unsigned) * RTE_MAX_LCORE,
+ 0);
+
+ if (mapping_id == NULL)
+ return -1;
+
+ for (i = 0 ;i < RTE_MAX_LCORE; i++)
+ mapping_id[i] = INVALID_MAPPING_ID;
+ }
+ return 0;
+}
+
+/* Create ring which used for communicate among master and slave */
+static int
+create_ms_ring(unsigned slaveid)
+{
+ unsigned flag = RING_F_SP_ENQ | RING_F_SC_DEQ;
+ struct lcore_resource_struct *res = &lcore_resource[slaveid];
+ unsigned socketid = rte_socket_id();
+
+ /* Always assume create ring on master socket_id */
+ /* Default only create a ring size 32 */
+ snprintf(res->ring_name[0], MAX_NAME_LEN, "%s%u",
+ RING_MASTER_NAME, slaveid);
+ if ((res->ring[0] = create_ring(res->ring_name[0], NB_CORE_MSGBUF,
+ socketid, flag)) == NULL) {
+ printf("Create m2s ring %s failed\n", res->ring_name[0]);
+ return -1;
+ }
+
+ snprintf(res->ring_name[1], MAX_NAME_LEN, "%s%u",
+ RING_SLAVE_NAME, slaveid);
+ if ((res->ring[1] = create_ring(res->ring_name[1], NB_CORE_MSGBUF,
+ socketid, flag)) == NULL) {
+ printf("Create s2m ring %s failed\n", res->ring_name[1]);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* send command to pair in paired master and slave ring */
+static inline int
+sendcmd(unsigned slaveid, enum l2fwd_cmd cmd, int is_master)
+{
+ struct lcore_resource_struct *res = &lcore_resource[slaveid];
+ void *msg;
+ int fd = !is_master;
+
+ /* Only check master, it must be enabled and running if it is slave */
+ if (is_master && !res->enabled)
+ return -1;
+
+ if (res->ring[fd] == NULL)
+ return -1;
+
+ if (rte_mempool_get(message_pool, &msg) < 0) {
+ printf("Error to get message buffer\n");
+ return -1;
+ }
+
+ *(enum l2fwd_cmd *)msg = cmd;
+
+ if (rte_ring_enqueue(res->ring[fd], msg) != 0) {
+ printf("Enqueue error\n");
+ rte_mempool_put(message_pool, msg);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Get command from pair in paired master and slave ring */
+static inline int
+getcmd(unsigned slaveid, enum l2fwd_cmd *cmd, int is_master)
+{
+ struct lcore_resource_struct *res = &lcore_resource[slaveid];
+ void *msg;
+ int fd = !!is_master;
+ int ret;
+ /* Only check master, it must be enabled and running if it is slave */
+ if (is_master && (!res->enabled))
+ return -1;
+
+ if (res->ring[fd] == NULL)
+ return -1;
+
+ ret = rte_ring_dequeue(res->ring[fd], &msg);
+
+ if (ret == 0) {
+ *cmd = *(enum l2fwd_cmd *)msg;
+ rte_mempool_put(message_pool, msg);
+ }
+ return ret;
+}
+
+/* Master send command to slave and wait until ack received or error met */
+static int
+master_sendcmd_with_ack(unsigned slaveid, enum l2fwd_cmd cmd)
+{
+ enum l2fwd_cmd ack_cmd;
+ int ret = -1;
+
+ if (sendcmd(slaveid, cmd, 1) != 0)
+ rte_exit(EXIT_FAILURE, "Failed to send message\n");
+
+ /* Get ack */
+ while (1) {
+ ret = getcmd(slaveid, &ack_cmd, 1);
+ if (ret == 0 && cmd == ack_cmd)
+ break;
+
+ /* If slave not running yet, return an error */
+ if (flib_query_slave_status(slaveid) != ST_RUN) {
+ ret = -ENOENT;
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/* restart all port that assigned to that slave lcore */
+static int
+reset_slave_all_ports(unsigned slaveid)
+{
+ struct lcore_resource_struct *slave = &lcore_resource[slaveid];
+ int i, ret = 0;
+
+ /* stop/start port */
+ for (i = 0; i < slave->port_num; i++) {
+ char buf_name[RTE_MEMPOOL_NAMESIZE];
+ struct rte_mempool *pool;
+ printf("Stop port :%d\n", slave->port[i]);
+ rte_eth_dev_stop(slave->port[i]);
+ snprintf(buf_name, RTE_MEMPOOL_NAMESIZE, MBUF_NAME, slave->port[i]);
+ pool = rte_mempool_lookup(buf_name);
+ if (pool)
+ printf("Port %d mempool free object is %u(%u)\n", slave->port[i],
+ rte_mempool_count(pool), (unsigned)NB_MBUF);
+ else
+ printf("Can't find mempool %s\n", buf_name);
+
+ printf("Start port :%d\n", slave->port[i]);
+ ret = rte_eth_dev_start(slave->port[i]);
+ if (ret != 0)
+ break;
+ }
+ return ret;
+}
+
+static int
+reset_shared_structures(unsigned slaveid)
+{
+ int ret;
+ /* Only port are shared resource here */
+ ret = reset_slave_all_ports(slaveid);
+
+ return ret;
+}
+
+/**
+ * Call this function to re-create resource that needed for slave process that
+ * exited in last instance
+ **/
+static int
+init_slave_res(unsigned slaveid)
+{
+ struct lcore_resource_struct *slave = &lcore_resource[slaveid];
+ enum l2fwd_cmd cmd;
+
+ if (!slave->enabled) {
+ printf("Something wrong with lcore=%u enabled=%d\n",slaveid,
+ slave->enabled);
+ return -1;
+ }
+
+ /* Initialize ring */
+ if (create_ms_ring(slaveid) != 0)
+ rte_exit(EXIT_FAILURE, "failed to create ring for slave %u\n",
+ slaveid);
+
+ /* drain un-read buffer if have */
+ while (getcmd(slaveid, &cmd, 1) == 0);
+ while (getcmd(slaveid, &cmd, 0) == 0);
+
+ return 0;
+}
+
+static int
+recreate_one_slave(unsigned slaveid)
+{
+ int ret = 0;
+ /* Re-initialize resource for stalled slave */
+ if ((ret = init_slave_res(slaveid)) != 0) {
+ printf("Init slave=%u failed\n", slaveid);
+ return ret;
+ }
+
+ if ((ret = flib_remote_launch(l2fwd_launch_one_lcore, NULL, slaveid))
+ != 0)
+ printf("Launch slave %u failed\n", slaveid);
+
+ return ret;
+}
+
+/**
+ * remapping resource belong to slave_id to new lcore that gets from flib_assign_lcore_id(),
+ * used only floating process option applied.
+ *
+ * @param slaveid
+ * original lcore_id that apply for remapping
+ */
+static void
+remapping_slave_resource(unsigned slaveid, unsigned map_id)
+{
+
+ /* remapping lcore_resource */
+ memcpy(&lcore_resource[map_id], &lcore_resource[slaveid],
+ sizeof(struct lcore_resource_struct));
+
+ /* remapping lcore_queue_conf */
+ memcpy(&lcore_queue_conf[map_id], &lcore_queue_conf[slaveid],
+ sizeof(struct lcore_queue_conf));
+}
+
+static int
+reset_pair(unsigned slaveid, unsigned pairid)
+{
+ int ret;
+ if ((ret = reset_shared_structures(slaveid)) != 0)
+ goto back;
+
+ if((ret = reset_shared_structures(pairid)) != 0)
+ goto back;
+
+ if (float_proc) {
+ unsigned map_id = mapping_id[slaveid];
+
+ if (map_id != INVALID_MAPPING_ID) {
+ printf("%u return mapping id %u\n", slaveid, map_id);
+ flib_free_lcore_id(map_id);
+ mapping_id[slaveid] = INVALID_MAPPING_ID;
+ }
+
+ map_id = mapping_id[pairid];
+ if (map_id != INVALID_MAPPING_ID) {
+ printf("%u return mapping id %u\n", pairid, map_id);
+ flib_free_lcore_id(map_id);
+ mapping_id[pairid] = INVALID_MAPPING_ID;
+ }
+ }
+
+ if((ret = recreate_one_slave(slaveid)) != 0)
+ goto back;
+
+ ret = recreate_one_slave(pairid);
+
+back:
+ return ret;
+}
+
+static void
+slave_exit_cb(unsigned slaveid, __attribute__((unused))int stat)
+{
+ struct lcore_resource_struct *slave = &lcore_resource[slaveid];
+
+ printf("Get slave %u leave info\n", slaveid);
+ if (!slave->enabled) {
+ printf("Lcore=%u not registered for it's exit\n", slaveid);
+ return;
+ }
+ rte_spinlock_lock(&res_lock);
+
+ /* Change the state and wait master to start them */
+ slave->flags = SLAVE_RECREATE_FLAG;
+
+ rte_spinlock_unlock(&res_lock);
+}
+
+static void
+l2fwd_simple_forward(struct rte_mbuf *m, unsigned portid)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+ unsigned dst_port;
+ int sent;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ dst_port = l2fwd_dst_ports[portid];
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&l2fwd_ports_eth_addr[dst_port], &eth->s_addr);
+
+ buffer = tx_buffer[dst_port];
+ sent = rte_eth_tx_buffer(dst_port, 0, buffer, m);
+ if (sent)
+ port_statistics[dst_port].tx += sent;
+}
+
+/* main processing loop */
+static void
+l2fwd_main_loop(void)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_mbuf *m;
+ int sent;
+ unsigned lcore_id;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ unsigned i, j, portid, nb_rx;
+ struct lcore_queue_conf *qconf;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S *
+ BURST_TX_DRAIN_US;
+ struct rte_eth_dev_tx_buffer *buffer;
+
+ prev_tsc = 0;
+
+ lcore_id = rte_lcore_id();
+
+ qconf = &lcore_queue_conf[lcore_id];
+
+ if (qconf->n_rx_port == 0) {
+ RTE_LOG(INFO, L2FWD, "lcore %u has nothing to do\n", lcore_id);
+ return;
+ }
+
+ RTE_LOG(INFO, L2FWD, "entering main loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+ portid = qconf->rx_port_list[i];
+ RTE_LOG(INFO, L2FWD, " -- lcoreid=%u portid=%u\n", lcore_id,
+ portid);
+ }
+
+ while (1) {
+ enum l2fwd_cmd cmd;
+ cur_tsc = rte_rdtsc();
+
+ if (unlikely(getcmd(lcore_id, &cmd, 0) == 0)) {
+ sendcmd(lcore_id, cmd, 0);
+
+ /* If get stop command, stop forwarding and exit */
+ if (cmd == CMD_STOP) {
+ return;
+ }
+ }
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = l2fwd_dst_ports[qconf->rx_port_list[i]];
+ buffer = tx_buffer[portid];
+
+ sent = rte_eth_tx_buffer_flush(portid, 0, buffer);
+ if (sent)
+ port_statistics[portid].tx += sent;
+
+ }
+ }
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < qconf->n_rx_port; i++) {
+
+ portid = qconf->rx_port_list[i];
+ nb_rx = rte_eth_rx_burst((uint8_t) portid, 0,
+ pkts_burst, MAX_PKT_BURST);
+
+ port_statistics[portid].rx += nb_rx;
+
+ for (j = 0; j < nb_rx; j++) {
+ m = pkts_burst[j];
+ rte_prefetch0(rte_pktmbuf_mtod(m, void *));
+ l2fwd_simple_forward(m, portid);
+ }
+ }
+ }
+}
+
+static int
+l2fwd_launch_one_lcore(__attribute__((unused)) void *dummy)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ if (float_proc) {
+ unsigned flcore_id;
+
+ /* Change it to floating process, also change it's lcore_id */
+ clear_cpu_affinity();
+ RTE_PER_LCORE(_lcore_id) = 0;
+ /* Get a lcore_id */
+ if (flib_assign_lcore_id() < 0 ) {
+ printf("flib_assign_lcore_id failed\n");
+ return -1;
+ }
+ flcore_id = rte_lcore_id();
+ /* Set mapping id, so master can return it after slave exited */
+ mapping_id[lcore_id] = flcore_id;
+ printf("Org lcore_id = %u, cur lcore_id = %u\n",
+ lcore_id, flcore_id);
+ remapping_slave_resource(lcore_id, flcore_id);
+ }
+
+ l2fwd_main_loop();
+
+ /* return lcore_id before return */
+ if (float_proc) {
+ flib_free_lcore_id(rte_lcore_id());
+ mapping_id[lcore_id] = INVALID_MAPPING_ID;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+l2fwd_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK -s COREMASK [-q NQ] -f\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -q NQ: number of queue (=ports) per lcore (default is 1)\n"
+ " -f use floating process which won't bind to any core to run\n"
+ " -T PERIOD: statistics will be refreshed each PERIOD seconds (0 to disable, 10 default, 86400 maximum)\n",
+ prgname);
+}
+
+static int
+l2fwd_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static unsigned int
+l2fwd_parse_nqueue(const char *q_arg)
+{
+ char *end = NULL;
+ unsigned long n;
+
+ /* parse hexadecimal string */
+ n = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return 0;
+ if (n == 0)
+ return 0;
+ if (n >= MAX_RX_QUEUE_PER_LCORE)
+ return 0;
+
+ return n;
+}
+
+static int
+l2fwd_parse_timer_period(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n >= MAX_TIMER_PERIOD)
+ return -1;
+
+ return n;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+l2fwd_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+ int has_pmask = 0;
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:q:T:f",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ l2fwd_enabled_port_mask = l2fwd_parse_portmask(optarg);
+ if (l2fwd_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ has_pmask = 1;
+ break;
+
+ /* nqueue */
+ case 'q':
+ l2fwd_rx_queue_per_lcore = l2fwd_parse_nqueue(optarg);
+ if (l2fwd_rx_queue_per_lcore == 0) {
+ printf("invalid queue number\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* timer period */
+ case 'T':
+ timer_period = l2fwd_parse_timer_period(optarg) * 1000 * TIMER_MILLISECOND;
+ if (timer_period < 0) {
+ printf("invalid timer period\n");
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ break;
+
+ /* use floating process */
+ case 'f':
+ float_proc = 1;
+ break;
+
+ /* long options */
+ case 0:
+ l2fwd_usage(prgname);
+ return -1;
+
+ default:
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ if (!has_pmask) {
+ l2fwd_usage(prgname);
+ return -1;
+ }
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct lcore_queue_conf *qconf;
+ struct rte_eth_dev_info dev_info;
+ int ret;
+ uint8_t nb_ports;
+ uint8_t nb_ports_available;
+ uint8_t portid, last_port;
+ unsigned rx_lcore_id;
+ unsigned nb_ports_in_mask = 0;
+ unsigned i;
+ int flags = 0;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, timer_tsc;
+
+ /* Save cpu_affinity first, restore it in case it's floating process option */
+ if (get_cpu_affinity() != 0)
+ rte_exit(EXIT_FAILURE, "get_cpu_affinity error\n");
+
+ /* Also tries to set cpu affinity to detect whether it will fail in child process */
+ if(clear_cpu_affinity() != 0)
+ rte_exit(EXIT_FAILURE, "clear_cpu_affinity error\n");
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL arguments\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse application arguments (after the EAL ones) */
+ ret = l2fwd_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L2FWD arguments\n");
+
+ /*flib init */
+ if (flib_init() != 0)
+ rte_exit(EXIT_FAILURE, "flib init error");
+
+ /**
+ * Allocated structures that slave lcore would change. For those that slaves are
+ * read only, needn't use malloc to share and global or static variables is ok since
+ * slave inherit all the knowledge that master initialized.
+ **/
+ if (l2fwd_malloc_shared_struct() != 0)
+ rte_exit(EXIT_FAILURE, "malloc mem failed\n");
+
+ /* Initialize lcore_resource structures */
+ memset(lcore_resource, 0, sizeof(lcore_resource));
+ for (i = 0; i < RTE_MAX_LCORE; i++)
+ lcore_resource[i].lcore_id = i;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /* create the mbuf pool */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+ char buf_name[RTE_MEMPOOL_NAMESIZE];
+ flags = MEMPOOL_F_SP_PUT | MEMPOOL_F_SC_GET;
+ snprintf(buf_name, RTE_MEMPOOL_NAMESIZE, MBUF_NAME, portid);
+ l2fwd_pktmbuf_pool[portid] =
+ rte_mempool_create(buf_name, NB_MBUF,
+ MBUF_SIZE, 32,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), flags);
+ if (l2fwd_pktmbuf_pool[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool\n");
+
+ printf("Create mbuf %s\n", buf_name);
+ }
+
+ /* reset l2fwd_dst_ports */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++)
+ l2fwd_dst_ports[portid] = 0;
+ last_port = 0;
+
+ /*
+ * Each logical core is assigned a dedicated TX queue on each port.
+ */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ if (nb_ports_in_mask % 2) {
+ l2fwd_dst_ports[portid] = last_port;
+ l2fwd_dst_ports[last_port] = portid;
+ }
+ else
+ last_port = portid;
+
+ nb_ports_in_mask++;
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ }
+ if (nb_ports_in_mask % 2) {
+ printf("Notice: odd number of ports in portmask.\n");
+ l2fwd_dst_ports[last_port] = last_port;
+ }
+
+ rx_lcore_id = 0;
+ qconf = NULL;
+
+ /* Initialize the port/queue configuration of each logical core */
+ for (portid = 0; portid < nb_ports; portid++) {
+ struct lcore_resource_struct *res;
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* get the lcore_id for this port */
+ /* skip master lcore */
+ while (rte_lcore_is_enabled(rx_lcore_id) == 0 ||
+ rte_get_master_lcore() == rx_lcore_id ||
+ lcore_queue_conf[rx_lcore_id].n_rx_port ==
+ l2fwd_rx_queue_per_lcore) {
+
+ rx_lcore_id++;
+ if (rx_lcore_id >= RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+ }
+
+ if (qconf != &lcore_queue_conf[rx_lcore_id])
+ /* Assigned a new logical core in the loop above. */
+ qconf = &lcore_queue_conf[rx_lcore_id];
+
+ qconf->rx_port_list[qconf->n_rx_port] = portid;
+ qconf->n_rx_port++;
+
+ /* Save the port resource info into lcore_resource strucutres */
+ res = &lcore_resource[rx_lcore_id];
+ res->enabled = 1;
+ res->port[res->port_num++] = portid;
+
+ printf("Lcore %u: RX port %u\n", rx_lcore_id, (unsigned) portid);
+ }
+
+ nb_ports_available = nb_ports;
+
+ /* Initialise each port */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0) {
+ printf("Skipping disabled port %u\n", (unsigned) portid);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... ", (unsigned) portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ rte_eth_macaddr_get(portid,&l2fwd_ports_eth_addr[portid]);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, nb_rxd,
+ rte_eth_dev_socket_id(portid),
+ NULL,
+ l2fwd_pktmbuf_pool[portid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* init one TX queue on each port */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0, nb_txd,
+ rte_eth_dev_socket_id(portid),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ /* Initialize TX buffers */
+ tx_buffer[portid] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKT_BURST), 0,
+ rte_eth_dev_socket_id(portid));
+ if (tx_buffer[portid] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) portid);
+
+ rte_eth_tx_buffer_init(tx_buffer[portid], MAX_PKT_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[portid],
+ rte_eth_tx_buffer_count_callback,
+ &port_statistics[portid].dropped);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) portid);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start:err=%d, port=%u\n",
+ ret, (unsigned) portid);
+
+ printf("done: \n");
+
+ rte_eth_promiscuous_enable(portid);
+
+ printf("Port %u, MAC address: %02X:%02X:%02X:%02X:%02X:%02X\n\n",
+ (unsigned) portid,
+ l2fwd_ports_eth_addr[portid].addr_bytes[0],
+ l2fwd_ports_eth_addr[portid].addr_bytes[1],
+ l2fwd_ports_eth_addr[portid].addr_bytes[2],
+ l2fwd_ports_eth_addr[portid].addr_bytes[3],
+ l2fwd_ports_eth_addr[portid].addr_bytes[4],
+ l2fwd_ports_eth_addr[portid].addr_bytes[5]);
+
+ /* initialize port stats */
+ //memset(&port_statistics, 0, sizeof(port_statistics));
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ check_all_ports_link_status(nb_ports, l2fwd_enabled_port_mask);
+
+ /* Record pair lcore */
+ /**
+ * Since l2fwd example would create pair between different neighbour port, that's
+ * port 0 receive and forward to port 1, the same to port 1, these 2 ports will have
+ * dependency. If one port stopped working (killed, for example), the port need to
+ * be stopped/started again. During the time, another port need to wait until stop/start
+ * procedure completed. So, record the pair relationship for those lcores working
+ * on ports.
+ **/
+ for (portid = 0; portid < nb_ports; portid++) {
+ uint32_t pair_port;
+ unsigned lcore = 0, pair_lcore = 0;
+ unsigned j, find_lcore, find_pair_lcore;
+ /* skip ports that are not enabled */
+ if ((l2fwd_enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* Find pair ports' lcores */
+ find_lcore = find_pair_lcore = 0;
+ pair_port = l2fwd_dst_ports[portid];
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (!rte_lcore_is_enabled(i))
+ continue;
+ for (j = 0; j < lcore_queue_conf[i].n_rx_port;j++) {
+ if (lcore_queue_conf[i].rx_port_list[j] == portid) {
+ lcore = i;
+ find_lcore = 1;
+ break;
+ }
+ if (lcore_queue_conf[i].rx_port_list[j] == pair_port) {
+ pair_lcore = i;
+ find_pair_lcore = 1;
+ break;
+ }
+ }
+ if (find_lcore && find_pair_lcore)
+ break;
+ }
+ if (!find_lcore || !find_pair_lcore)
+ rte_exit(EXIT_FAILURE, "Not find port=%d pair\n", portid);
+
+ printf("lcore %u and %u paired\n", lcore, pair_lcore);
+ lcore_resource[lcore].pair_id = pair_lcore;
+ lcore_resource[pair_lcore].pair_id = lcore;
+ }
+
+ /* Create message buffer for all master and slave */
+ message_pool = rte_mempool_create("ms_msg_pool",
+ NB_CORE_MSGBUF * RTE_MAX_LCORE,
+ sizeof(enum l2fwd_cmd), NB_CORE_MSGBUF / 2,
+ 0,
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+
+ if (message_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Create msg mempool failed\n");
+
+ /* Create ring for each master and slave pair, also register cb when slave leaves */
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ /**
+ * Only create ring and register slave_exit cb in case that core involved into
+ * packet forwarding
+ **/
+ if (lcore_resource[i].enabled) {
+ /* Create ring for master and slave communication */
+ ret = create_ms_ring(i);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "Create ring for lcore=%u failed",
+ i);
+
+ if (flib_register_slave_exit_notify(i,
+ slave_exit_cb) != 0)
+ rte_exit(EXIT_FAILURE,
+ "Register master_trace_slave_exit failed");
+ }
+ }
+
+ /* launch per-lcore init on every lcore except master */
+ flib_mp_remote_launch(l2fwd_launch_one_lcore, NULL, SKIP_MASTER);
+
+ /* print statistics 10 second */
+ prev_tsc = cur_tsc = rte_rdtsc();
+ timer_tsc = 0;
+ while (1) {
+ sleep(1);
+ cur_tsc = rte_rdtsc();
+ diff_tsc = cur_tsc - prev_tsc;
+ /* if timer is enabled */
+ if (timer_period > 0) {
+
+ /* advance the timer */
+ timer_tsc += diff_tsc;
+
+ /* if timer has reached its timeout */
+ if (unlikely(timer_tsc >= (uint64_t) timer_period)) {
+
+ print_stats();
+ /* reset the timer */
+ timer_tsc = 0;
+ }
+ }
+
+ prev_tsc = cur_tsc;
+
+ /* Check any slave need restart or recreate */
+ rte_spinlock_lock(&res_lock);
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ struct lcore_resource_struct *res = &lcore_resource[i];
+ struct lcore_resource_struct *pair = &lcore_resource[res->pair_id];
+
+ /* If find slave exited, try to reset pair */
+ if (res->enabled && res->flags && pair->enabled) {
+ if (!pair->flags) {
+ master_sendcmd_with_ack(pair->lcore_id, CMD_STOP);
+ rte_spinlock_unlock(&res_lock);
+ sleep(1);
+ rte_spinlock_lock(&res_lock);
+ if (pair->flags)
+ continue;
+ }
+ if (reset_pair(res->lcore_id, pair->lcore_id) != 0)
+ rte_exit(EXIT_FAILURE, "failed to reset slave");
+ res->flags = 0;
+ pair->flags = 0;
+ }
+ }
+ rte_spinlock_unlock(&res_lock);
+ }
+
+}
diff --git a/examples/multi_process/simple_mp/Makefile b/examples/multi_process/simple_mp/Makefile
new file mode 100644
index 00000000..31ec0c80
--- /dev/null
+++ b/examples/multi_process/simple_mp/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = simple_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c mp_commands.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/simple_mp/main.c b/examples/multi_process/simple_mp/main.c
new file mode 100644
index 00000000..2843d94e
--- /dev/null
+++ b/examples/multi_process/simple_mp/main.c
@@ -0,0 +1,155 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This sample application is a simple multi-process application which
+ * demostrates sharing of queues and memory pools between processes, and
+ * using those queues/pools for communication between the processes.
+ *
+ * Application is designed to run with two processes, a primary and a
+ * secondary, and each accepts commands on the commandline, the most
+ * important of which is "send", which just sends a string to the other
+ * process.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <unistd.h>
+#include <termios.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+#include "mp_commands.h"
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+static const char *_MSG_POOL = "MSG_POOL";
+static const char *_SEC_2_PRI = "SEC_2_PRI";
+static const char *_PRI_2_SEC = "PRI_2_SEC";
+const unsigned string_size = 64;
+
+struct rte_ring *send_ring, *recv_ring;
+struct rte_mempool *message_pool;
+volatile int quit = 0;
+
+static int
+lcore_recv(__attribute__((unused)) void *arg)
+{
+ unsigned lcore_id = rte_lcore_id();
+
+ printf("Starting core %u\n", lcore_id);
+ while (!quit){
+ void *msg;
+ if (rte_ring_dequeue(recv_ring, &msg) < 0){
+ usleep(5);
+ continue;
+ }
+ printf("core %u: Received '%s'\n", lcore_id, (char *)msg);
+ rte_mempool_put(message_pool, msg);
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ const unsigned flags = 0;
+ const unsigned ring_size = 64;
+ const unsigned pool_size = 1024;
+ const unsigned pool_cache = 32;
+ const unsigned priv_data_sz = 0;
+
+ int ret;
+ unsigned lcore_id;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot init EAL\n");
+
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY){
+ send_ring = rte_ring_create(_PRI_2_SEC, ring_size, rte_socket_id(), flags);
+ recv_ring = rte_ring_create(_SEC_2_PRI, ring_size, rte_socket_id(), flags);
+ message_pool = rte_mempool_create(_MSG_POOL, pool_size,
+ string_size, pool_cache, priv_data_sz,
+ NULL, NULL, NULL, NULL,
+ rte_socket_id(), flags);
+ } else {
+ recv_ring = rte_ring_lookup(_PRI_2_SEC);
+ send_ring = rte_ring_lookup(_SEC_2_PRI);
+ message_pool = rte_mempool_lookup(_MSG_POOL);
+ }
+ if (send_ring == NULL)
+ rte_exit(EXIT_FAILURE, "Problem getting sending ring\n");
+ if (recv_ring == NULL)
+ rte_exit(EXIT_FAILURE, "Problem getting receiving ring\n");
+ if (message_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Problem getting message pool\n");
+
+ RTE_LOG(INFO, APP, "Finished Process Init.\n");
+
+ /* call lcore_recv() on every slave lcore */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(lcore_recv, NULL, lcore_id);
+ }
+
+ /* call cmd prompt on master lcore */
+ struct cmdline *cl = cmdline_stdin_new(simple_mp_ctx, "\nsimple_mp > ");
+ if (cl == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create cmdline instance\n");
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ rte_eal_mp_wait_lcore();
+ return 0;
+}
diff --git a/examples/multi_process/simple_mp/mp_commands.c b/examples/multi_process/simple_mp/mp_commands.c
new file mode 100644
index 00000000..8da244bb
--- /dev/null
+++ b/examples/multi_process/simple_mp/mp_commands.c
@@ -0,0 +1,166 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <termios.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_ring.h>
+#include <rte_debug.h>
+#include <rte_mempool.h>
+#include <rte_string_fns.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+#include "mp_commands.h"
+
+/**********************************************************/
+
+struct cmd_send_result {
+ cmdline_fixed_string_t action;
+ cmdline_fixed_string_t message;
+};
+
+static void cmd_send_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ void *msg = NULL;
+ struct cmd_send_result *res = parsed_result;
+
+ if (rte_mempool_get(message_pool, &msg) < 0)
+ rte_panic("Failed to get message buffer\n");
+ snprintf((char *)msg, string_size, "%s", res->message);
+ if (rte_ring_enqueue(send_ring, msg) < 0) {
+ printf("Failed to send message - message discarded\n");
+ rte_mempool_put(message_pool, msg);
+ }
+}
+
+cmdline_parse_token_string_t cmd_send_action =
+ TOKEN_STRING_INITIALIZER(struct cmd_send_result, action, "send");
+cmdline_parse_token_string_t cmd_send_message =
+ TOKEN_STRING_INITIALIZER(struct cmd_send_result, message, NULL);
+
+cmdline_parse_inst_t cmd_send = {
+ .f = cmd_send_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "send a string to another process",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_send_action,
+ (void *)&cmd_send_message,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ quit = 1;
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "close the application",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/**********************************************************/
+
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl, "Simple demo example of multi-process in RTE\n\n"
+ "This is a readline-like interface that can be used to\n"
+ "send commands to the simple app. Commands supported are:\n\n"
+ "- send [string]\n" "- help\n" "- quit\n\n");
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "show help",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+/****** CONTEXT (list of instruction) */
+cmdline_parse_ctx_t simple_mp_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_send,
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_help,
+ NULL,
+};
diff --git a/examples/multi_process/simple_mp/mp_commands.h b/examples/multi_process/simple_mp/mp_commands.h
new file mode 100644
index 00000000..7e9a4ab2
--- /dev/null
+++ b/examples/multi_process/simple_mp/mp_commands.h
@@ -0,0 +1,44 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _SIMPLE_MP_COMMANDS_H_
+#define _SIMPLE_MP_COMMANDS_H_
+
+extern const unsigned string_size;
+extern struct rte_ring *send_ring;
+extern struct rte_mempool *message_pool;
+extern volatile int quit;
+
+extern cmdline_parse_ctx_t simple_mp_ctx[];
+
+#endif /* _SIMPLE_MP_COMMANDS_H_ */
diff --git a/examples/multi_process/symmetric_mp/Makefile b/examples/multi_process/symmetric_mp/Makefile
new file mode 100644
index 00000000..c789f3c9
--- /dev/null
+++ b/examples/multi_process/symmetric_mp/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = symmetric_mp
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/multi_process/symmetric_mp/main.c b/examples/multi_process/symmetric_mp/main.c
new file mode 100644
index 00000000..6bbff076
--- /dev/null
+++ b/examples/multi_process/symmetric_mp/main.c
@@ -0,0 +1,472 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Sample application demostrating how to do packet I/O in a multi-process
+ * environment. The same code can be run as a primary process and as a
+ * secondary process, just with a different proc-id parameter in each case
+ * (apart from the EAL flag to indicate a secondary process).
+ *
+ * Each process will read from the same ports, given by the port-mask
+ * parameter, which should be the same in each case, just using a different
+ * queue per port as determined by the proc-id parameter.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <sys/queue.h>
+#include <getopt.h>
+#include <signal.h>
+#include <inttypes.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_ring.h>
+#include <rte_debug.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_memcpy.h>
+#include <rte_mbuf.h>
+#include <rte_string_fns.h>
+#include <rte_cycles.h>
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+#define NB_MBUFS 64*1024 /* use 64k mbufs */
+#define MBUF_CACHE_SIZE 256
+#define PKT_BURST 32
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define PARAM_PROC_ID "proc-id"
+#define PARAM_NUM_PROCS "num-procs"
+
+/* for each lcore, record the elements of the ports array to use */
+struct lcore_ports{
+ unsigned start_port;
+ unsigned num_ports;
+};
+
+/* structure to record the rx and tx packets. Put two per cache line as ports
+ * used in pairs */
+struct port_stats{
+ unsigned rx;
+ unsigned tx;
+ unsigned drop;
+} __attribute__((aligned(RTE_CACHE_LINE_SIZE / 2)));
+
+static int proc_id = -1;
+static unsigned num_procs = 0;
+
+static uint8_t ports[RTE_MAX_ETHPORTS];
+static unsigned num_ports = 0;
+
+static struct lcore_ports lcore_ports[RTE_MAX_LCORE];
+static struct port_stats pstats[RTE_MAX_ETHPORTS];
+
+/* prints the usage statement and quits with an error message */
+static void
+smp_usage(const char *prgname, const char *errmsg)
+{
+ printf("\nError: %s\n",errmsg);
+ printf("\n%s [EAL options] -- -p <port mask> "
+ "--"PARAM_NUM_PROCS" <n>"
+ " --"PARAM_PROC_ID" <id>\n"
+ "-p : a hex bitmask indicating what ports are to be used\n"
+ "--num-procs: the number of processes which will be used\n"
+ "--proc-id : the id of the current process (id < num-procs)\n"
+ "\n",
+ prgname);
+ exit(1);
+}
+
+
+/* signal handler configured for SIGTERM and SIGINT to print stats on exit */
+static void
+print_stats(int signum)
+{
+ unsigned i;
+ printf("\nExiting on signal %d\n\n", signum);
+ for (i = 0; i < num_ports; i++){
+ const uint8_t p_num = ports[i];
+ printf("Port %u: RX - %u, TX - %u, Drop - %u\n", (unsigned)p_num,
+ pstats[p_num].rx, pstats[p_num].tx, pstats[p_num].drop);
+ }
+ exit(0);
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+smp_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ unsigned i, port_mask = 0;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {PARAM_NUM_PROCS, 1, 0, 0},
+ {PARAM_PROC_ID, 1, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:", \
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ case 'p':
+ port_mask = strtoull(optarg, NULL, 16);
+ break;
+ /* long options */
+ case 0:
+ if (strncmp(lgopts[option_index].name, PARAM_NUM_PROCS, 8) == 0)
+ num_procs = atoi(optarg);
+ else if (strncmp(lgopts[option_index].name, PARAM_PROC_ID, 7) == 0)
+ proc_id = atoi(optarg);
+ break;
+
+ default:
+ smp_usage(prgname, "Cannot parse all command-line arguments\n");
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ if (proc_id < 0)
+ smp_usage(prgname, "Invalid or missing proc-id parameter\n");
+ if (rte_eal_process_type() == RTE_PROC_PRIMARY && num_procs == 0)
+ smp_usage(prgname, "Invalid or missing num-procs parameter\n");
+ if (port_mask == 0)
+ smp_usage(prgname, "Invalid or missing port mask\n");
+
+ /* get the port numbers from the port mask */
+ for(i = 0; i < rte_eth_dev_count(); i++)
+ if(port_mask & (1 << i))
+ ports[num_ports++] = (uint8_t)i;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+
+ return ret;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+smp_port_init(uint8_t port, struct rte_mempool *mbuf_pool, uint16_t num_queues)
+{
+ struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ }
+ };
+ const uint16_t rx_rings = num_queues, tx_rings = num_queues;
+ struct rte_eth_dev_info info;
+ int retval;
+ uint16_t q;
+
+ if (rte_eal_process_type() == RTE_PROC_SECONDARY)
+ return 0;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ printf("# Initialising port %u... ", (unsigned)port);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(port, &info);
+ info.default_rxconf.rx_drop_en = 1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval < 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q ++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ &info.default_rxconf,
+ mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q ++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port),
+ NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ rte_eth_promiscuous_enable(port);
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ return 0;
+}
+
+/* Goes through each of the lcores and calculates what ports should
+ * be used by that core. Fills in the global lcore_ports[] array.
+ */
+static void
+assign_ports_to_cores(void)
+{
+
+ const unsigned lcores = rte_eal_get_configuration()->lcore_count;
+ const unsigned port_pairs = num_ports / 2;
+ const unsigned pairs_per_lcore = port_pairs / lcores;
+ unsigned extra_pairs = port_pairs % lcores;
+ unsigned ports_assigned = 0;
+ unsigned i;
+
+ RTE_LCORE_FOREACH(i) {
+ lcore_ports[i].start_port = ports_assigned;
+ lcore_ports[i].num_ports = pairs_per_lcore * 2;
+ if (extra_pairs > 0) {
+ lcore_ports[i].num_ports += 2;
+ extra_pairs--;
+ }
+ ports_assigned += lcore_ports[i].num_ports;
+ }
+}
+
+/* Main function used by the processing threads.
+ * Prints out some configuration details for the thread and then begins
+ * performing packet RX and TX.
+ */
+static int
+lcore_main(void *arg __rte_unused)
+{
+ const unsigned id = rte_lcore_id();
+ const unsigned start_port = lcore_ports[id].start_port;
+ const unsigned end_port = start_port + lcore_ports[id].num_ports;
+ const uint16_t q_id = (uint16_t)proc_id;
+ unsigned p, i;
+ char msgbuf[256];
+ int msgbufpos = 0;
+
+ if (start_port == end_port){
+ printf("Lcore %u has nothing to do\n", id);
+ return 0;
+ }
+
+ /* build up message in msgbuf before printing to decrease likelihood
+ * of multi-core message interleaving.
+ */
+ msgbufpos += snprintf(msgbuf, sizeof(msgbuf) - msgbufpos,
+ "Lcore %u using ports ", id);
+ for (p = start_port; p < end_port; p++){
+ msgbufpos += snprintf(msgbuf + msgbufpos, sizeof(msgbuf) - msgbufpos,
+ "%u ", (unsigned)ports[p]);
+ }
+ printf("%s\n", msgbuf);
+ printf("lcore %u using queue %u of each port\n", id, (unsigned)q_id);
+
+ /* handle packet I/O from the ports, reading and writing to the
+ * queue number corresponding to our process number (not lcore id)
+ */
+
+ for (;;) {
+ struct rte_mbuf *buf[PKT_BURST];
+
+ for (p = start_port; p < end_port; p++) {
+ const uint8_t src = ports[p];
+ const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
+ const uint16_t rx_c = rte_eth_rx_burst(src, q_id, buf, PKT_BURST);
+ if (rx_c == 0)
+ continue;
+ pstats[src].rx += rx_c;
+
+ const uint16_t tx_c = rte_eth_tx_burst(dst, q_id, buf, rx_c);
+ pstats[dst].tx += tx_c;
+ if (tx_c != rx_c) {
+ pstats[dst].drop += (rx_c - tx_c);
+ for (i = tx_c; i < rx_c; i++)
+ rte_pktmbuf_free(buf[i]);
+ }
+ }
+ }
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+/* Main function.
+ * Performs initialisation and then calls the lcore_main on each core
+ * to do the packet-processing work.
+ */
+int
+main(int argc, char **argv)
+{
+ static const char *_SMP_MBUF_POOL = "SMP_MBUF_POOL";
+ int ret;
+ unsigned i;
+ enum rte_proc_type_t proc_type;
+ struct rte_mempool *mp;
+
+ /* set up signal handlers to print stats on exit */
+ signal(SIGINT, print_stats);
+ signal(SIGTERM, print_stats);
+
+ /* initialise the EAL for all */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot init EAL\n");
+ argc -= ret;
+ argv += ret;
+
+ /* determine the NIC devices available */
+ if (rte_eth_dev_count() == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet ports - bye\n");
+
+ /* parse application arguments (those after the EAL ones) */
+ smp_parse_args(argc, argv);
+
+ proc_type = rte_eal_process_type();
+ mp = (proc_type == RTE_PROC_SECONDARY) ?
+ rte_mempool_lookup(_SMP_MBUF_POOL) :
+ rte_pktmbuf_pool_create(_SMP_MBUF_POOL, NB_MBUFS,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_socket_id());
+ if (mp == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot get memory pool for buffers\n");
+
+ if (num_ports & 1)
+ rte_exit(EXIT_FAILURE, "Application must use an even number of ports\n");
+ for(i = 0; i < num_ports; i++){
+ if(proc_type == RTE_PROC_PRIMARY)
+ if (smp_port_init(ports[i], mp, (uint16_t)num_procs) < 0)
+ rte_exit(EXIT_FAILURE, "Error initialising ports\n");
+ }
+
+ if (proc_type == RTE_PROC_PRIMARY)
+ check_all_ports_link_status((uint8_t)num_ports, (~0x0));
+
+ assign_ports_to_cores();
+
+ RTE_LOG(INFO, APP, "Finished Process Init.\n");
+
+ rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
+
+ return 0;
+}
diff --git a/examples/netmap_compat/Makefile b/examples/netmap_compat/Makefile
new file mode 100644
index 00000000..52d80869
--- /dev/null
+++ b/examples/netmap_compat/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+unexport RTE_SRCDIR RTE_OUTPUT RTE_EXTMK
+
+DIRS-y += bridge
+
+.PHONY: all clean $(DIRS-y)
+
+all: $(DIRS-y)
+clean: $(DIRS-y)
+
+$(DIRS-y):
+ $(MAKE) -C $@ $(MAKECMDGOALS) O=$(RTE_OUTPUT)
diff --git a/examples/netmap_compat/bridge/Makefile b/examples/netmap_compat/bridge/Makefile
new file mode 100644
index 00000000..50d96e81
--- /dev/null
+++ b/examples/netmap_compat/bridge/Makefile
@@ -0,0 +1,62 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define the RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = bridge
+
+# for compat_netmap.c
+VPATH := $(SRCDIR)/../lib
+
+# all source are stored in SRCS-y
+SRCS-y := bridge.c
+SRCS-y += compat_netmap.c
+
+CFLAGS += -O3 -I$(SRCDIR)/../lib -I$(SRCDIR)/../netmap
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/netmap_compat/bridge/bridge.c b/examples/netmap_compat/bridge/bridge.c
new file mode 100644
index 00000000..53f5fdb6
--- /dev/null
+++ b/examples/netmap_compat/bridge/bridge.c
@@ -0,0 +1,377 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <getopt.h>
+#include <inttypes.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_string_fns.h>
+#include "compat_netmap.h"
+
+
+#define BUF_SIZE RTE_MBUF_DEFAULT_DATAROOM
+#define MBUF_DATA_SIZE (BUF_SIZE + RTE_PKTMBUF_HEADROOM)
+
+#define MBUF_PER_POOL 8192
+
+struct rte_eth_conf eth_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0,
+ .hw_ip_checksum = 0,
+ .hw_vlan_filter = 0,
+ .jumbo_frame = 0,
+ .hw_strip_crc = 0,
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+#define MAX_QUEUE_NUM 1
+#define RX_QUEUE_NUM 1
+#define TX_QUEUE_NUM 1
+
+#define MAX_DESC_NUM 0x400
+#define RX_DESC_NUM 0x100
+#define TX_DESC_NUM 0x200
+
+#define RX_SYNC_NUM 0x20
+#define TX_SYNC_NUM 0x20
+
+struct rte_netmap_port_conf port_conf = {
+ .eth_conf = &eth_conf,
+ .socket_id = SOCKET_ID_ANY,
+ .nr_tx_rings = TX_QUEUE_NUM,
+ .nr_rx_rings = RX_QUEUE_NUM,
+ .nr_tx_slots = TX_DESC_NUM,
+ .nr_rx_slots = RX_DESC_NUM,
+ .tx_burst = TX_SYNC_NUM,
+ .rx_burst = RX_SYNC_NUM,
+};
+
+struct rte_netmap_conf netmap_conf = {
+ .socket_id = SOCKET_ID_ANY,
+ .max_bufsz = BUF_SIZE,
+ .max_rings = MAX_QUEUE_NUM,
+ .max_slots = MAX_DESC_NUM,
+};
+
+static int stop = 0;
+
+#define MAX_PORT_NUM 2
+
+struct netmap_port {
+ int fd;
+ struct netmap_if *nmif;
+ struct netmap_ring *rx_ring;
+ struct netmap_ring *tx_ring;
+ const char *str;
+ uint8_t id;
+};
+
+static struct {
+ uint32_t num;
+ struct netmap_port p[MAX_PORT_NUM];
+ void *mem;
+} ports;
+
+static void
+usage(const char *prgname)
+{
+ fprintf(stderr, "Usage: %s [EAL args] -- [OPTION]...\n"
+ "-h, --help \t Show this help message and exit\n"
+ "-i INTERFACE_A \t Interface (DPDK port number) to use\n"
+ "[ -i INTERFACE_B \t Interface (DPDK port number) to use ]\n",
+ prgname);
+}
+
+static uint8_t
+parse_portid(const char *portid_str)
+{
+ char *end;
+ unsigned id;
+
+ id = strtoul(portid_str, &end, 10);
+
+ if (end == portid_str || *end != '\0' || id > RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE, "Invalid port number\n");
+
+ return (uint8_t) id;
+}
+
+static int
+parse_args(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hi:")) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ rte_exit(EXIT_SUCCESS, "exiting...");
+ break;
+ case 'i':
+ if (ports.num >= RTE_DIM(ports.p)) {
+ usage(argv[0]);
+ rte_exit(EXIT_FAILURE, "configs with %u "
+ "ports are not supported\n",
+ ports.num + 1);
+
+ }
+
+ ports.p[ports.num].str = optarg;
+ ports.p[ports.num].id = parse_portid(optarg);
+ ports.num++;
+ break;
+ default:
+ usage(argv[0]);
+ rte_exit(EXIT_FAILURE, "invalid option: %c\n", opt);
+ }
+ }
+
+ return 0;
+}
+
+static void sigint_handler(__rte_unused int sig)
+{
+ stop = 1;
+ signal(SIGINT, SIG_DFL);
+}
+
+static void move(int n, struct netmap_ring *rx, struct netmap_ring *tx)
+{
+ uint32_t tmp;
+
+ while (n-- > 0) {
+ tmp = tx->slot[tx->cur].buf_idx;
+
+ tx->slot[tx->cur].buf_idx = rx->slot[rx->cur].buf_idx;
+ tx->slot[tx->cur].len = rx->slot[rx->cur].len;
+ tx->slot[tx->cur].flags |= NS_BUF_CHANGED;
+ tx->cur = NETMAP_RING_NEXT(tx, tx->cur);
+ tx->avail--;
+
+ rx->slot[rx->cur].buf_idx = tmp;
+ rx->slot[rx->cur].flags |= NS_BUF_CHANGED;
+ rx->cur = NETMAP_RING_NEXT(rx, rx->cur);
+ rx->avail--;
+ }
+}
+
+static int
+netmap_port_open(uint32_t idx)
+{
+ int err;
+ struct netmap_port *port;
+ struct nmreq req;
+
+ port = ports.p + idx;
+
+ port->fd = rte_netmap_open("/dev/netmap", O_RDWR);
+
+ snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str);
+ req.nr_version = NETMAP_API;
+ req.nr_ringid = 0;
+
+ err = rte_netmap_ioctl(port->fd, NIOCGINFO, &req);
+ if (err) {
+ printf("[E] NIOCGINFO ioctl failed (error %d)\n", err);
+ return err;
+ }
+
+ snprintf(req.nr_name, sizeof(req.nr_name), "%s", port->str);
+ req.nr_version = NETMAP_API;
+ req.nr_ringid = 0;
+
+ err = rte_netmap_ioctl(port->fd, NIOCREGIF, &req);
+ if (err) {
+ printf("[E] NIOCREGIF ioctl failed (error %d)\n", err);
+ return err;
+ }
+
+ /* mmap only once. */
+ if (ports.mem == NULL)
+ ports.mem = rte_netmap_mmap(NULL, req.nr_memsize,
+ PROT_WRITE | PROT_READ, MAP_PRIVATE, port->fd, 0);
+
+ if (ports.mem == MAP_FAILED) {
+ printf("[E] NETMAP mmap failed for fd: %d)\n", port->fd);
+ return -ENOMEM;
+ }
+
+ port->nmif = NETMAP_IF(ports.mem, req.nr_offset);
+
+ port->tx_ring = NETMAP_TXRING(port->nmif, 0);
+ port->rx_ring = NETMAP_RXRING(port->nmif, 0);
+
+ return 0;
+}
+
+
+int main(int argc, char *argv[])
+{
+ int err, ret;
+ uint32_t i, pmsk;
+ struct nmreq req;
+ struct pollfd pollfd[MAX_PORT_NUM];
+ struct rte_mempool *pool;
+ struct netmap_ring *rx_ring, *tx_ring;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n");
+
+ argc -= ret;
+ argv += ret;
+
+ parse_args(argc, argv);
+
+ if (ports.num == 0)
+ rte_exit(EXIT_FAILURE, "no ports specified\n");
+
+ if (rte_eth_dev_count() < 1)
+ rte_exit(EXIT_FAILURE, "Not enough ethernet ports available\n");
+
+ pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0,
+ MBUF_DATA_SIZE, rte_socket_id());
+ if (pool == NULL)
+ rte_exit(EXIT_FAILURE, "Couldn't create mempool\n");
+
+ netmap_conf.socket_id = rte_socket_id();
+ err = rte_netmap_init(&netmap_conf);
+
+ if (err < 0)
+ rte_exit(EXIT_FAILURE,
+ "Couldn't initialize librte_compat_netmap\n");
+ else
+ printf("librte_compat_netmap initialized\n");
+
+ port_conf.pool = pool;
+ port_conf.socket_id = rte_socket_id();
+
+ for (i = 0; i != ports.num; i++) {
+
+ err = rte_netmap_init_port(ports.p[i].id, &port_conf);
+ if (err < 0)
+ rte_exit(EXIT_FAILURE, "Couldn't setup port %hhu\n",
+ ports.p[i].id);
+
+ rte_eth_promiscuous_enable(ports.p[i].id);
+ }
+
+ for (i = 0; i != ports.num; i++) {
+
+ err = netmap_port_open(i);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Couldn't set port %hhu "
+ "under NETMAP control\n",
+ ports.p[i].id);
+ }
+ else
+ printf("Port %hhu now in Netmap mode\n", ports.p[i].id);
+ }
+
+ memset(pollfd, 0, sizeof(pollfd));
+
+ for (i = 0; i != ports.num; i++) {
+ pollfd[i].fd = ports.p[i].fd;
+ pollfd[i].events = POLLIN | POLLOUT;
+ }
+
+ signal(SIGINT, sigint_handler);
+
+ pmsk = ports.num - 1;
+
+ printf("Bridge up and running!\n");
+
+ while (!stop) {
+ uint32_t n_pkts;
+
+ pollfd[0].revents = 0;
+ pollfd[1].revents = 0;
+
+ ret = rte_netmap_poll(pollfd, ports.num, 0);
+ if (ret < 0) {
+ stop = 1;
+ printf("[E] poll returned with error %d\n", ret);
+ }
+
+ if (((pollfd[0].revents | pollfd[1].revents) & POLLERR) != 0) {
+ printf("POLLERR!\n");
+ }
+
+ if ((pollfd[0].revents & POLLIN) != 0 &&
+ (pollfd[pmsk].revents & POLLOUT) != 0) {
+
+ rx_ring = ports.p[0].rx_ring;
+ tx_ring = ports.p[pmsk].tx_ring;
+
+ n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail);
+ move(n_pkts, rx_ring, tx_ring);
+ }
+
+ if (pmsk != 0 && (pollfd[pmsk].revents & POLLIN) != 0 &&
+ (pollfd[0].revents & POLLOUT) != 0) {
+
+ rx_ring = ports.p[pmsk].rx_ring;
+ tx_ring = ports.p[0].tx_ring;
+
+ n_pkts = RTE_MIN(rx_ring->avail, tx_ring->avail);
+ move(n_pkts, rx_ring, tx_ring);
+ }
+ }
+
+ printf("Bridge stopped!\n");
+
+ for (i = 0; i != ports.num; i++) {
+ err = rte_netmap_ioctl(ports.p[i].fd, NIOCUNREGIF, &req);
+ if (err) {
+ printf("[E] NIOCUNREGIF ioctl failed (error %d)\n",
+ err);
+ }
+ else
+ printf("Port %hhu unregistered from Netmap mode\n", ports.p[i].id);
+
+ rte_netmap_close(ports.p[i].fd);
+ }
+ return 0;
+}
diff --git a/examples/netmap_compat/lib/compat_netmap.c b/examples/netmap_compat/lib/compat_netmap.c
new file mode 100644
index 00000000..bf1b418a
--- /dev/null
+++ b/examples/netmap_compat/lib/compat_netmap.c
@@ -0,0 +1,908 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <net/if.h>
+#include <sys/types.h>
+#include <sys/resource.h>
+#include <sys/mman.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_memzone.h>
+#include <rte_spinlock.h>
+#include <rte_string_fns.h>
+
+#include "compat_netmap.h"
+
+struct netmap_port {
+ struct rte_mempool *pool;
+ struct netmap_if *nmif;
+ struct rte_eth_conf eth_conf;
+ struct rte_eth_txconf tx_conf;
+ struct rte_eth_rxconf rx_conf;
+ int32_t socket_id;
+ uint16_t nr_tx_rings;
+ uint16_t nr_rx_rings;
+ uint32_t nr_tx_slots;
+ uint32_t nr_rx_slots;
+ uint16_t tx_burst;
+ uint16_t rx_burst;
+ uint32_t fd;
+};
+
+struct fd_port {
+ uint32_t port;
+};
+
+#ifndef POLLRDNORM
+#define POLLRDNORM 0x0040
+#endif
+
+#ifndef POLLWRNORM
+#define POLLWRNORM 0x0100
+#endif
+
+#define FD_PORT_FREE UINT32_MAX
+#define FD_PORT_RSRV (FD_PORT_FREE - 1)
+
+struct netmap_state {
+ struct rte_netmap_conf conf;
+ uintptr_t buf_start;
+ void *mem;
+ uint32_t mem_sz;
+ uint32_t netif_memsz;
+};
+
+
+#define COMPAT_NETMAP_MAX_NOFILE (2 * RTE_MAX_ETHPORTS)
+#define COMPAT_NETMAP_MAX_BURST 64
+#define COMPAT_NETMAP_MAX_PKT_PER_SYNC (2 * COMPAT_NETMAP_MAX_BURST)
+
+static struct netmap_port ports[RTE_MAX_ETHPORTS];
+static struct netmap_state netmap;
+
+static struct fd_port fd_port[COMPAT_NETMAP_MAX_NOFILE];
+static const int next_fd_start = RLIMIT_NOFILE + 1;
+static rte_spinlock_t netmap_lock;
+
+#define IDX_TO_FD(x) ((x) + next_fd_start)
+#define FD_TO_IDX(x) ((x) - next_fd_start)
+#define FD_VALID(x) ((x) >= next_fd_start && \
+ (x) < (typeof (x))(RTE_DIM(fd_port) + next_fd_start))
+
+#define PORT_NUM_RINGS (2 * netmap.conf.max_rings)
+#define PORT_NUM_SLOTS (PORT_NUM_RINGS * netmap.conf.max_slots)
+
+#define BUF_IDX(port, ring, slot) \
+ (((port) * PORT_NUM_RINGS + (ring)) * netmap.conf.max_slots + \
+ (slot))
+
+#define NETMAP_IF_RING_OFS(rid, rings, slots) ({\
+ struct netmap_if *_if; \
+ struct netmap_ring *_rg; \
+ sizeof(*_if) + \
+ (rings) * sizeof(_if->ring_ofs[0]) + \
+ (rid) * sizeof(*_rg) + \
+ (slots) * sizeof(_rg->slot[0]); \
+ })
+
+static void netmap_unregif(uint32_t idx, uint32_t port);
+
+
+static int32_t
+ifname_to_portid(const char *ifname, uint8_t *port)
+{
+ char *endptr;
+ uint64_t portid;
+
+ errno = 0;
+ portid = strtoul(ifname, &endptr, 10);
+ if (endptr == ifname || *endptr != '\0' ||
+ portid >= RTE_DIM(ports) || errno != 0)
+ return -EINVAL;
+
+ *port = (uint8_t)portid;
+ return 0;
+}
+
+/**
+ * Given a dpdk mbuf, fill in the Netmap slot in ring r and its associated
+ * buffer with the data held by the mbuf.
+ * Note that mbuf chains are not supported.
+ */
+static void
+mbuf_to_slot(struct rte_mbuf *mbuf, struct netmap_ring *r, uint32_t index)
+{
+ char *data;
+ uint16_t length;
+
+ data = rte_pktmbuf_mtod(mbuf, char *);
+ length = rte_pktmbuf_data_len(mbuf);
+
+ if (length > r->nr_buf_size)
+ length = 0;
+
+ r->slot[index].len = length;
+ rte_memcpy(NETMAP_BUF(r, r->slot[index].buf_idx), data, length);
+}
+
+/**
+ * Given a Netmap ring and a slot index for that ring, construct a dpdk mbuf
+ * from the data held in the buffer associated with the slot.
+ * Allocation/deallocation of the dpdk mbuf are the responsability of the
+ * caller.
+ * Note that mbuf chains are not supported.
+ */
+static void
+slot_to_mbuf(struct netmap_ring *r, uint32_t index, struct rte_mbuf *mbuf)
+{
+ char *data;
+ uint16_t length;
+
+ rte_pktmbuf_reset(mbuf);
+ length = r->slot[index].len;
+ data = rte_pktmbuf_append(mbuf, length);
+
+ if (data != NULL)
+ rte_memcpy(data, NETMAP_BUF(r, r->slot[index].buf_idx), length);
+}
+
+static int32_t
+fd_reserve(void)
+{
+ uint32_t i;
+
+ for (i = 0; i != RTE_DIM(fd_port) && fd_port[i].port != FD_PORT_FREE;
+ i++)
+ ;
+
+ if (i == RTE_DIM(fd_port))
+ return -ENOMEM;
+
+ fd_port[i].port = FD_PORT_RSRV;
+ return IDX_TO_FD(i);
+}
+
+static int32_t
+fd_release(int32_t fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+
+ if (!FD_VALID(fd) || (port = fd_port[idx].port) == FD_PORT_FREE)
+ return -EINVAL;
+
+ /* if we still have a valid port attached, release the port */
+ if (port < RTE_DIM(ports) && ports[port].fd == idx) {
+ netmap_unregif(idx, port);
+ }
+
+ fd_port[idx].port = FD_PORT_FREE;
+ return 0;
+}
+
+static int
+check_nmreq(struct nmreq *req, uint8_t *port)
+{
+ int32_t rc;
+ uint8_t portid;
+
+ if (req == NULL)
+ return -EINVAL;
+
+ if (req->nr_version != NETMAP_API) {
+ req->nr_version = NETMAP_API;
+ return -EINVAL;
+ }
+
+ if ((rc = ifname_to_portid(req->nr_name, &portid)) != 0) {
+ RTE_LOG(ERR, USER1, "Invalid interface name:\"%s\" "
+ "in NIOCGINFO call\n", req->nr_name);
+ return rc;
+ }
+
+ if (ports[portid].pool == NULL) {
+ RTE_LOG(ERR, USER1, "Misconfigured portid %hhu\n", portid);
+ return -EINVAL;
+ }
+
+ *port = portid;
+ return 0;
+}
+
+/**
+ * Simulate a Netmap NIOCGINFO ioctl: given a struct nmreq holding an interface
+ * name (a port number in our case), fill the struct nmreq in with advisory
+ * information about the interface: number of rings and their size, total memory
+ * required in the map, ...
+ * Those are preconfigured using rte_eth_{,tx,rx}conf and
+ * rte_netmap_port_conf structures
+ * and calls to rte_netmap_init_port() in the Netmap application.
+ */
+static int
+ioctl_niocginfo(__rte_unused int fd, void * param)
+{
+ uint8_t portid;
+ struct nmreq *req;
+ int32_t rc;
+
+ req = (struct nmreq *)param;
+ if ((rc = check_nmreq(req, &portid)) != 0)
+ return rc;
+
+ req->nr_tx_rings = (uint16_t)(ports[portid].nr_tx_rings - 1);
+ req->nr_rx_rings = (uint16_t)(ports[portid].nr_rx_rings - 1);
+ req->nr_tx_slots = ports[portid].nr_tx_slots;
+ req->nr_rx_slots = ports[portid].nr_rx_slots;
+
+ /* in current implementation we have all NETIFs shared aone region. */
+ req->nr_memsize = netmap.mem_sz;
+ req->nr_offset = 0;
+
+ return 0;
+}
+
+static void
+netmap_ring_setup(struct netmap_ring *ring, uint8_t port, uint32_t ringid,
+ uint32_t num_slots)
+{
+ uint32_t j;
+
+ ring->buf_ofs = netmap.buf_start - (uintptr_t)ring;
+ ring->num_slots = num_slots;
+ ring->cur = 0;
+ ring->reserved = 0;
+ ring->nr_buf_size = netmap.conf.max_bufsz;
+ ring->flags = 0;
+ ring->ts.tv_sec = 0;
+ ring->ts.tv_usec = 0;
+
+ for (j = 0; j < ring->num_slots; j++) {
+ ring->slot[j].buf_idx = BUF_IDX(port, ringid, j);
+ ring->slot[j].len = 0;
+ ring->flags = 0;
+ }
+}
+
+static int
+netmap_regif(struct nmreq *req, uint32_t idx, uint8_t port)
+{
+ struct netmap_if *nmif;
+ struct netmap_ring *ring;
+ uint32_t i, slots, start_ring;
+ int32_t rc;
+
+ if (ports[port].fd < RTE_DIM(fd_port)) {
+ RTE_LOG(ERR, USER1, "port %hhu already in use by fd: %u\n",
+ port, IDX_TO_FD(ports[port].fd));
+ return -EBUSY;
+ }
+ if (fd_port[idx].port != FD_PORT_RSRV) {
+ RTE_LOG(ERR, USER1, "fd: %u is misconfigured\n",
+ IDX_TO_FD(idx));
+ return -EBUSY;
+ }
+
+ nmif = ports[port].nmif;
+
+ /* setup netmap_if fields. */
+ memset(nmif, 0, netmap.netif_memsz);
+
+ /* only ALL rings supported right now. */
+ if (req->nr_ringid != 0)
+ return -EINVAL;
+
+ snprintf(nmif->ni_name, sizeof(nmif->ni_name), "%s", req->nr_name);
+ nmif->ni_version = req->nr_version;
+
+ /* Netmap uses ni_(r|t)x_rings + 1 */
+ nmif->ni_rx_rings = ports[port].nr_rx_rings - 1;
+ nmif->ni_tx_rings = ports[port].nr_tx_rings - 1;
+
+ /*
+ * Setup TX rings and slots.
+ * Refer to the comments in netmap.h for details
+ */
+
+ slots = 0;
+ for (i = 0; i < nmif->ni_tx_rings + 1; i++) {
+
+ nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
+ PORT_NUM_RINGS, slots);
+
+ ring = NETMAP_TXRING(nmif, i);
+ netmap_ring_setup(ring, port, i, ports[port].nr_tx_slots);
+ ring->avail = ring->num_slots;
+
+ slots += ports[port].nr_tx_slots;
+ }
+
+ /*
+ * Setup RX rings and slots.
+ * Refer to the comments in netmap.h for details
+ */
+
+ start_ring = i;
+
+ for (; i < nmif->ni_rx_rings + 1 + start_ring; i++) {
+
+ nmif->ring_ofs[i] = NETMAP_IF_RING_OFS(i,
+ PORT_NUM_RINGS, slots);
+
+ ring = NETMAP_RXRING(nmif, (i - start_ring));
+ netmap_ring_setup(ring, port, i, ports[port].nr_rx_slots);
+ ring->avail = 0;
+
+ slots += ports[port].nr_rx_slots;
+ }
+
+ if ((rc = rte_eth_dev_start(port)) < 0) {
+ RTE_LOG(ERR, USER1,
+ "Couldn't start ethernet device %s (error %d)\n",
+ req->nr_name, rc);
+ return rc;
+ }
+
+ /* setup fdi <--> port relationtip. */
+ ports[port].fd = idx;
+ fd_port[idx].port = port;
+
+ req->nr_memsize = netmap.mem_sz;
+ req->nr_offset = (uintptr_t)nmif - (uintptr_t)netmap.mem;
+
+ return 0;
+}
+
+/**
+ * Simulate a Netmap NIOCREGIF ioctl:
+ */
+static int
+ioctl_niocregif(int32_t fd, void * param)
+{
+ uint8_t portid;
+ int32_t rc;
+ uint32_t idx;
+ struct nmreq *req;
+
+ req = (struct nmreq *)param;
+ if ((rc = check_nmreq(req, &portid)) != 0)
+ return rc;
+
+ idx = FD_TO_IDX(fd);
+
+ rte_spinlock_lock(&netmap_lock);
+ rc = netmap_regif(req, idx, portid);
+ rte_spinlock_unlock(&netmap_lock);
+
+ return rc;
+}
+
+static void
+netmap_unregif(uint32_t idx, uint32_t port)
+{
+ fd_port[idx].port = FD_PORT_RSRV;
+ ports[port].fd = UINT32_MAX;
+ rte_eth_dev_stop((uint8_t)port);
+}
+
+/**
+ * Simulate a Netmap NIOCUNREGIF ioctl: put an interface running in Netmap
+ * mode back in "normal" mode. In our case, we just stop the port associated
+ * with this file descriptor.
+ */
+static int
+ioctl_niocunregif(int fd)
+{
+ uint32_t idx, port;
+ int32_t rc;
+
+ idx = FD_TO_IDX(fd);
+
+ rte_spinlock_lock(&netmap_lock);
+
+ port = fd_port[idx].port;
+ if (port < RTE_DIM(ports) && ports[port].fd == idx) {
+ netmap_unregif(idx, port);
+ rc = 0;
+ } else {
+ RTE_LOG(ERR, USER1,
+ "%s: %d is not associated with valid port\n",
+ __func__, fd);
+ rc = -EINVAL;
+ }
+
+ rte_spinlock_unlock(&netmap_lock);
+ return rc;
+}
+
+/**
+ * A call to rx_sync_ring will try to fill a Netmap RX ring with as many
+ * packets as it can hold coming from its dpdk port.
+ */
+static inline int
+rx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number,
+ uint16_t max_burst)
+{
+ int32_t i, n_rx;
+ uint16_t burst_size;
+ uint32_t cur_slot, n_free_slots;
+ struct rte_mbuf *rx_mbufs[COMPAT_NETMAP_MAX_BURST];
+
+ n_free_slots = ring->num_slots - (ring->avail + ring->reserved);
+ n_free_slots = RTE_MIN(n_free_slots, max_burst);
+ cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
+
+ while (n_free_slots) {
+ burst_size = (uint16_t)RTE_MIN(n_free_slots, RTE_DIM(rx_mbufs));
+
+ /* receive up to burst_size packets from the NIC's queue */
+ n_rx = rte_eth_rx_burst(port, ring_number, rx_mbufs,
+ burst_size);
+
+ if (n_rx == 0)
+ return 0;
+ if (unlikely(n_rx < 0))
+ return -1;
+
+ /* Put those n_rx packets in the Netmap structures */
+ for (i = 0; i < n_rx ; i++) {
+ mbuf_to_slot(rx_mbufs[i], ring, cur_slot);
+ rte_pktmbuf_free(rx_mbufs[i]);
+ cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
+ }
+
+ /* Update the Netmap ring structure to reflect the change */
+ ring->avail += n_rx;
+ n_free_slots -= n_rx;
+ }
+
+ return 0;
+}
+
+static inline int
+rx_sync_if(uint32_t port)
+{
+ uint16_t burst;
+ uint32_t i, rc;
+ struct netmap_if *nifp;
+ struct netmap_ring *r;
+
+ nifp = ports[port].nmif;
+ burst = ports[port].rx_burst;
+ rc = 0;
+
+ for (i = 0; i < nifp->ni_rx_rings + 1; i++) {
+ r = NETMAP_RXRING(nifp, i);
+ rx_sync_ring(r, (uint8_t)port, (uint16_t)i, burst);
+ rc += r->avail;
+ }
+
+ return rc;
+}
+
+/**
+ * Simulate a Netmap NIOCRXSYNC ioctl:
+ */
+static int
+ioctl_niocrxsync(int fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+ if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
+ ports[port].fd == idx) {
+ return rx_sync_if(fd_port[idx].port);
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * A call to tx_sync_ring will try to empty a Netmap TX ring by converting its
+ * buffers into rte_mbufs and sending them out on the rings's dpdk port.
+ */
+static int
+tx_sync_ring(struct netmap_ring *ring, uint8_t port, uint16_t ring_number,
+ struct rte_mempool *pool, uint16_t max_burst)
+{
+ uint32_t i, n_tx;
+ uint16_t burst_size;
+ uint32_t cur_slot, n_used_slots;
+ struct rte_mbuf *tx_mbufs[COMPAT_NETMAP_MAX_BURST];
+
+ n_used_slots = ring->num_slots - ring->avail;
+ n_used_slots = RTE_MIN(n_used_slots, max_burst);
+ cur_slot = (ring->cur + ring->avail) & (ring->num_slots - 1);
+
+ while (n_used_slots) {
+ burst_size = (uint16_t)RTE_MIN(n_used_slots, RTE_DIM(tx_mbufs));
+
+ for (i = 0; i < burst_size; i++) {
+ tx_mbufs[i] = rte_pktmbuf_alloc(pool);
+ if (tx_mbufs[i] == NULL)
+ goto err;
+
+ slot_to_mbuf(ring, cur_slot, tx_mbufs[i]);
+ cur_slot = NETMAP_RING_NEXT(ring, cur_slot);
+ }
+
+ n_tx = rte_eth_tx_burst(port, ring_number, tx_mbufs,
+ burst_size);
+
+ /* Update the Netmap ring structure to reflect the change */
+ ring->avail += n_tx;
+ n_used_slots -= n_tx;
+
+ /* Return the mbufs that failed to transmit to their pool */
+ if (unlikely(n_tx != burst_size)) {
+ for (i = n_tx; i < burst_size; i++)
+ rte_pktmbuf_free(tx_mbufs[i]);
+ break;
+ }
+ }
+
+ return 0;
+
+err:
+ for (; i == 0; --i)
+ rte_pktmbuf_free(tx_mbufs[i]);
+
+ RTE_LOG(ERR, USER1,
+ "Couldn't get mbuf from mempool is the mempool too small?\n");
+ return -1;
+}
+
+static int
+tx_sync_if(uint32_t port)
+{
+ uint16_t burst;
+ uint32_t i, rc;
+ struct netmap_if *nifp;
+ struct netmap_ring *r;
+ struct rte_mempool *mp;
+
+ nifp = ports[port].nmif;
+ mp = ports[port].pool;
+ burst = ports[port].tx_burst;
+ rc = 0;
+
+ for (i = 0; i < nifp->ni_tx_rings + 1; i++) {
+ r = NETMAP_TXRING(nifp, i);
+ tx_sync_ring(r, (uint8_t)port, (uint16_t)i, mp, burst);
+ rc += r->avail;
+ }
+
+ return rc;
+}
+
+/**
+ * Simulate a Netmap NIOCTXSYNC ioctl:
+ */
+static inline int
+ioctl_nioctxsync(int fd)
+{
+ uint32_t idx, port;
+
+ idx = FD_TO_IDX(fd);
+ if ((port = fd_port[idx].port) < RTE_DIM(ports) &&
+ ports[port].fd == idx) {
+ return tx_sync_if(fd_port[idx].port);
+ } else {
+ return -EINVAL;
+ }
+}
+
+/**
+ * Give the library a mempool of rte_mbufs with which it can do the
+ * rte_mbuf <--> netmap slot conversions.
+ */
+int
+rte_netmap_init(const struct rte_netmap_conf *conf)
+{
+ size_t buf_ofs, nmif_sz, sz;
+ size_t port_rings, port_slots, port_bufs;
+ uint32_t i, port_num;
+
+ port_num = RTE_MAX_ETHPORTS;
+ port_rings = 2 * conf->max_rings;
+ port_slots = port_rings * conf->max_slots;
+ port_bufs = port_slots;
+
+ nmif_sz = NETMAP_IF_RING_OFS(port_rings, port_rings, port_slots);
+ sz = nmif_sz * port_num;
+
+ buf_ofs = RTE_ALIGN_CEIL(sz, RTE_CACHE_LINE_SIZE);
+ sz = buf_ofs + port_bufs * conf->max_bufsz * port_num;
+
+ if (sz > UINT32_MAX ||
+ (netmap.mem = rte_zmalloc_socket(__func__, sz,
+ RTE_CACHE_LINE_SIZE, conf->socket_id)) == NULL) {
+ RTE_LOG(ERR, USER1, "%s: failed to allocate %zu bytes\n",
+ __func__, sz);
+ return -ENOMEM;
+ }
+
+ netmap.mem_sz = sz;
+ netmap.netif_memsz = nmif_sz;
+ netmap.buf_start = (uintptr_t)netmap.mem + buf_ofs;
+ netmap.conf = *conf;
+
+ rte_spinlock_init(&netmap_lock);
+
+ /* Mark all ports as unused and set NETIF pointer. */
+ for (i = 0; i != RTE_DIM(ports); i++) {
+ ports[i].fd = UINT32_MAX;
+ ports[i].nmif = (struct netmap_if *)
+ ((uintptr_t)netmap.mem + nmif_sz * i);
+ }
+
+ /* Mark all fd_ports as unused. */
+ for (i = 0; i != RTE_DIM(fd_port); i++) {
+ fd_port[i].port = FD_PORT_FREE;
+ }
+
+ return 0;
+}
+
+
+int
+rte_netmap_init_port(uint8_t portid, const struct rte_netmap_port_conf *conf)
+{
+ int32_t ret;
+ uint16_t i;
+ uint16_t rx_slots, tx_slots;
+
+ if (conf == NULL ||
+ portid >= RTE_DIM(ports) ||
+ conf->nr_tx_rings > netmap.conf.max_rings ||
+ conf->nr_rx_rings > netmap.conf.max_rings) {
+ RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n",
+ __func__, portid);
+ return -EINVAL;
+ }
+
+ rx_slots = (uint16_t)rte_align32pow2(conf->nr_rx_slots);
+ tx_slots = (uint16_t)rte_align32pow2(conf->nr_tx_slots);
+
+ if (tx_slots > netmap.conf.max_slots ||
+ rx_slots > netmap.conf.max_slots) {
+ RTE_LOG(ERR, USER1, "%s(%hhu): invalid parameters\n",
+ __func__, portid);
+ return -EINVAL;
+ }
+
+ ret = rte_eth_dev_configure(portid, conf->nr_rx_rings,
+ conf->nr_tx_rings, conf->eth_conf);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1, "Couldn't configure port %hhu\n", portid);
+ return ret;
+ }
+
+ for (i = 0; i < conf->nr_tx_rings; i++) {
+ ret = rte_eth_tx_queue_setup(portid, i, tx_slots,
+ conf->socket_id, NULL);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1,
+ "Couldn't configure TX queue %"PRIu16" of "
+ "port %"PRIu8"\n",
+ i, portid);
+ return ret;
+ }
+
+ ret = rte_eth_rx_queue_setup(portid, i, rx_slots,
+ conf->socket_id, NULL, conf->pool);
+
+ if (ret < 0) {
+ RTE_LOG(ERR, USER1,
+ "Couldn't configure RX queue %"PRIu16" of "
+ "port %"PRIu8"\n",
+ i, portid);
+ return ret;
+ }
+ }
+
+ /* copy config to the private storage. */
+ ports[portid].eth_conf = conf->eth_conf[0];
+ ports[portid].pool = conf->pool;
+ ports[portid].socket_id = conf->socket_id;
+ ports[portid].nr_tx_rings = conf->nr_tx_rings;
+ ports[portid].nr_rx_rings = conf->nr_rx_rings;
+ ports[portid].nr_tx_slots = tx_slots;
+ ports[portid].nr_rx_slots = rx_slots;
+ ports[portid].tx_burst = conf->tx_burst;
+ ports[portid].rx_burst = conf->rx_burst;
+
+ return 0;
+}
+
+int
+rte_netmap_close(__rte_unused int fd)
+{
+ int32_t rc;
+
+ rte_spinlock_lock(&netmap_lock);
+ rc = fd_release(fd);
+ rte_spinlock_unlock(&netmap_lock);
+
+ if (rc < 0) {
+ errno =-rc;
+ rc = -1;
+ }
+ return rc;
+}
+
+int rte_netmap_ioctl(int fd, uint32_t op, void *param)
+{
+ int ret;
+
+ if (!FD_VALID(fd)) {
+ errno = EBADF;
+ return -1;
+ }
+
+ switch (op) {
+
+ case NIOCGINFO:
+ ret = ioctl_niocginfo(fd, param);
+ break;
+
+ case NIOCREGIF:
+ ret = ioctl_niocregif(fd, param);
+ break;
+
+ case NIOCUNREGIF:
+ ret = ioctl_niocunregif(fd);
+ break;
+
+ case NIOCRXSYNC:
+ ret = ioctl_niocrxsync(fd);
+ break;
+
+ case NIOCTXSYNC:
+ ret = ioctl_nioctxsync(fd);
+ break;
+
+ default:
+ ret = -ENOTTY;
+ }
+
+ if (ret < 0) {
+ errno = -ret;
+ ret = -1;
+ } else {
+ ret = 0;
+ }
+
+ return ret;
+}
+
+void *
+rte_netmap_mmap(void *addr, size_t length,
+ int prot, int flags, int fd, off_t offset)
+{
+ static const int cprot = PROT_WRITE | PROT_READ;
+
+ if (!FD_VALID(fd) || length + offset > netmap.mem_sz ||
+ (prot & cprot) != cprot ||
+ ((flags & MAP_FIXED) != 0 && addr != NULL)) {
+
+ errno = EINVAL;
+ return MAP_FAILED;
+ }
+
+ return (void *)((uintptr_t)netmap.mem + (uintptr_t)offset);
+}
+
+/**
+ * Return a "fake" file descriptor with a value above RLIMIT_NOFILE so that
+ * any attempt to use that file descriptor with the usual API will fail.
+ */
+int
+rte_netmap_open(__rte_unused const char *pathname, __rte_unused int flags)
+{
+ int fd;
+
+ rte_spinlock_lock(&netmap_lock);
+ fd = fd_reserve();
+ rte_spinlock_unlock(&netmap_lock);
+
+ if (fd < 0) {
+ errno = -fd;
+ fd = -1;
+ }
+ return fd;
+}
+
+/**
+ * Doesn't support timeout other than 0 or infinite (negative) timeout
+ */
+int
+rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout)
+{
+ int32_t count_it, ret;
+ uint32_t i, idx, port;
+ uint32_t want_rx, want_tx;
+
+ ret = 0;
+ do {
+ for (i = 0; i < nfds; i++) {
+
+ count_it = 0;
+
+ if (!FD_VALID(fds[i].fd) || fds[i].events == 0) {
+ fds[i].revents = 0;
+ continue;
+ }
+
+ idx = FD_TO_IDX(fds[i].fd);
+ if ((port = fd_port[idx].port) >= RTE_DIM(ports) ||
+ ports[port].fd != idx) {
+
+ fds[i].revents |= POLLERR;
+ ret++;
+ continue;
+ }
+
+ want_rx = fds[i].events & (POLLIN | POLLRDNORM);
+ want_tx = fds[i].events & (POLLOUT | POLLWRNORM);
+
+ if (want_rx && rx_sync_if(port) > 0) {
+ fds[i].revents = (uint16_t)
+ (fds[i].revents | want_rx);
+ count_it = 1;
+ }
+ if (want_tx && tx_sync_if(port) > 0) {
+ fds[i].revents = (uint16_t)
+ (fds[i].revents | want_tx);
+ count_it = 1;
+ }
+
+ ret += count_it;
+ }
+ }
+ while ((ret == 0 && timeout < 0) || timeout);
+
+ return ret;
+}
diff --git a/examples/netmap_compat/lib/compat_netmap.h b/examples/netmap_compat/lib/compat_netmap.h
new file mode 100644
index 00000000..3dc7a2f4
--- /dev/null
+++ b/examples/netmap_compat/lib/compat_netmap.h
@@ -0,0 +1,80 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _RTE_COMPAT_NETMAP_H_
+
+#include <poll.h>
+#include <linux/ioctl.h>
+#include <net/if.h>
+
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+
+#include "netmap.h"
+#include "netmap_user.h"
+
+/**
+ * One can overwrite Netmap macros here as needed
+ */
+
+struct rte_netmap_conf {
+ int32_t socket_id;
+ uint32_t max_rings; /* number of rings(queues) per netmap_if(port) */
+ uint32_t max_slots; /* number of slots(descriptors) per netmap ring. */
+ uint16_t max_bufsz; /* size of each netmap buffer. */
+};
+
+struct rte_netmap_port_conf {
+ struct rte_eth_conf *eth_conf;
+ struct rte_mempool *pool;
+ int32_t socket_id;
+ uint16_t nr_tx_rings;
+ uint16_t nr_rx_rings;
+ uint32_t nr_tx_slots;
+ uint32_t nr_rx_slots;
+ uint16_t tx_burst;
+ uint16_t rx_burst;
+};
+
+int rte_netmap_init(const struct rte_netmap_conf *conf);
+int rte_netmap_init_port(uint8_t portid,
+ const struct rte_netmap_port_conf *conf);
+
+int rte_netmap_close(int fd);
+int rte_netmap_ioctl(int fd, uint32_t op, void *param);
+int rte_netmap_open(const char *pathname, int flags);
+int rte_netmap_poll(struct pollfd *fds, nfds_t nfds, int timeout);
+void *rte_netmap_mmap(void *addr, size_t length, int prot, int flags, int fd,
+ off_t offset);
+
+#endif /* _RTE_COMPAT_NETMAP_H_ */
diff --git a/examples/netmap_compat/netmap/netmap.h b/examples/netmap_compat/netmap/netmap.h
new file mode 100644
index 00000000..677c8a9f
--- /dev/null
+++ b/examples/netmap_compat/netmap/netmap.h
@@ -0,0 +1,289 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. Neither the name of the authors nor the names of their contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap.h 231198 2012-02-08 11:43:29Z luigi $
+ * $Id: netmap.h 10879 2012-04-12 22:48:59Z luigi $
+ *
+ * Definitions of constants and the structures used by the netmap
+ * framework, for the part visible to both kernel and userspace.
+ * Detailed info on netmap is available with "man netmap" or at
+ *
+ * http://info.iet.unipi.it/~luigi/netmap/
+ */
+
+#ifndef _NET_NETMAP_H_
+#define _NET_NETMAP_H_
+
+/*
+ * --- Netmap data structures ---
+ *
+ * The data structures used by netmap are shown below. Those in
+ * capital letters are in an mmapp()ed area shared with userspace,
+ * while others are private to the kernel.
+ * Shared structures do not contain pointers but only memory
+ * offsets, so that addressing is portable between kernel and userspace.
+
+
+ softc
++----------------+
+| standard fields|
+| if_pspare[0] ----------+
++----------------+ |
+ |
++----------------+<------+
+|(netmap_adapter)|
+| | netmap_kring
+| tx_rings *--------------------------------->+---------------+
+| | netmap_kring | ring *---------.
+| rx_rings *--------->+---------------+ | nr_hwcur | |
++----------------+ | ring *--------. | nr_hwavail | V
+ | nr_hwcur | | | selinfo | |
+ | nr_hwavail | | +---------------+ .
+ | selinfo | | | ... | .
+ +---------------+ | |(ntx+1 entries)|
+ | .... | | | |
+ |(nrx+1 entries)| | +---------------+
+ | | |
+ KERNEL +---------------+ |
+ |
+ ====================================================================
+ |
+ USERSPACE | NETMAP_RING
+ +---->+-------------+
+ / | cur |
+ NETMAP_IF (nifp, one per file desc.) / | avail |
+ +---------------+ / | buf_ofs |
+ | ni_tx_rings | / +=============+
+ | ni_rx_rings | / | buf_idx | slot[0]
+ | | / | len, flags |
+ | | / +-------------+
+ +===============+ / | buf_idx | slot[1]
+ | txring_ofs[0] | (rel.to nifp)--' | len, flags |
+ | txring_ofs[1] | +-------------+
+ (num_rings+1 entries) (nr_num_slots entries)
+ | txring_ofs[n] | | buf_idx | slot[n-1]
+ +---------------+ | len, flags |
+ | rxring_ofs[0] | +-------------+
+ | rxring_ofs[1] |
+ (num_rings+1 entries)
+ | txring_ofs[n] |
+ +---------------+
+
+ * The private descriptor ('softc' or 'adapter') of each interface
+ * is extended with a "struct netmap_adapter" containing netmap-related
+ * info (see description in dev/netmap/netmap_kernel.h.
+ * Among other things, tx_rings and rx_rings point to the arrays of
+ * "struct netmap_kring" which in turn reache the various
+ * "struct netmap_ring", shared with userspace.
+
+ * The NETMAP_RING is the userspace-visible replica of the NIC ring.
+ * Each slot has the index of a buffer, its length and some flags.
+ * In user space, the buffer address is computed as
+ * (char *)ring + buf_ofs + index*NETMAP_BUF_SIZE
+ * In the kernel, buffers do not necessarily need to be contiguous,
+ * and the virtual and physical addresses are derived through
+ * a lookup table.
+ * To associate a different buffer to a slot, applications must
+ * write the new index in buf_idx, and set NS_BUF_CHANGED flag to
+ * make sure that the kernel updates the hardware ring as needed.
+ *
+ * Normally the driver is not requested to report the result of
+ * transmissions (this can dramatically speed up operation).
+ * However the user may request to report completion by setting
+ * NS_REPORT.
+ */
+struct netmap_slot {
+ uint32_t buf_idx; /* buffer index */
+ uint16_t len; /* packet length, to be copied to/from the hw ring */
+ uint16_t flags; /* buf changed, etc. */
+#define NS_BUF_CHANGED 0x0001 /* must resync the map, buffer changed */
+#define NS_REPORT 0x0002 /* ask the hardware to report results
+ * e.g. by generating an interrupt
+ */
+};
+
+/*
+ * Netmap representation of a TX or RX ring (also known as "queue").
+ * This is a queue implemented as a fixed-size circular array.
+ * At the software level, two fields are important: avail and cur.
+ *
+ * In TX rings:
+ * avail indicates the number of slots available for transmission.
+ * It is updated by the kernel after every netmap system call.
+ * It MUST BE decremented by the application when it appends a
+ * packet.
+ * cur indicates the slot to use for the next packet
+ * to send (i.e. the "tail" of the queue).
+ * It MUST BE incremented by the application before
+ * netmap system calls to reflect the number of newly
+ * sent packets.
+ * It is checked by the kernel on netmap system calls
+ * (normally unmodified by the kernel unless invalid).
+ *
+ * The kernel side of netmap uses two additional fields in its own
+ * private ring structure, netmap_kring:
+ * nr_hwcur is a copy of nr_cur on an NIOCTXSYNC.
+ * nr_hwavail is the number of slots known as available by the
+ * hardware. It is updated on an INTR (inc by the
+ * number of packets sent) and on a NIOCTXSYNC
+ * (decrease by nr_cur - nr_hwcur)
+ * A special case, nr_hwavail is -1 if the transmit
+ * side is idle (no pending transmits).
+ *
+ * In RX rings:
+ * avail is the number of packets available (possibly 0).
+ * It MUST BE decremented by the application when it consumes
+ * a packet, and it is updated to nr_hwavail on a NIOCRXSYNC
+ * cur indicates the first slot that contains a packet not
+ * processed yet (the "head" of the queue).
+ * It MUST BE incremented by the software when it consumes
+ * a packet.
+ * reserved indicates the number of buffers before 'cur'
+ * that the application has still in use. Normally 0,
+ * it MUST BE incremented by the application when it
+ * does not return the buffer immediately, and decremented
+ * when the buffer is finally freed.
+ *
+ * The kernel side of netmap uses two additional fields in the kring:
+ * nr_hwcur is a copy of nr_cur on an NIOCRXSYNC
+ * nr_hwavail is the number of packets available. It is updated
+ * on INTR (inc by the number of new packets arrived)
+ * and on NIOCRXSYNC (decreased by nr_cur - nr_hwcur).
+ *
+ * DATA OWNERSHIP/LOCKING:
+ * The netmap_ring is owned by the user program and it is only
+ * accessed or modified in the upper half of the kernel during
+ * a system call.
+ *
+ * The netmap_kring is only modified by the upper half of the kernel.
+ */
+struct netmap_ring {
+ /*
+ * nr_buf_base_ofs is meant to be used through macros.
+ * It contains the offset of the buffer region from this
+ * descriptor.
+ */
+ ssize_t buf_ofs;
+ uint32_t num_slots; /* number of slots in the ring. */
+ uint32_t avail; /* number of usable slots */
+ uint32_t cur; /* 'current' r/w position */
+ uint32_t reserved; /* not refilled before current */
+
+ uint16_t nr_buf_size;
+ uint16_t flags;
+#define NR_TIMESTAMP 0x0002 /* set timestamp on *sync() */
+
+ struct timeval ts; /* time of last *sync() */
+
+ /* the slots follow. This struct has variable size */
+ struct netmap_slot slot[0]; /* array of slots. */
+};
+
+
+/*
+ * Netmap representation of an interface and its queue(s).
+ * There is one netmap_if for each file descriptor on which we want
+ * to select/poll. We assume that on each interface has the same number
+ * of receive and transmit queues.
+ * select/poll operates on one or all pairs depending on the value of
+ * nmr_queueid passed on the ioctl.
+ */
+struct netmap_if {
+ char ni_name[IFNAMSIZ]; /* name of the interface. */
+ u_int ni_version; /* API version, currently unused */
+ u_int ni_rx_rings; /* number of rx rings */
+ u_int ni_tx_rings; /* if zero, same as ni_rx_rings */
+ /*
+ * The following array contains the offset of each netmap ring
+ * from this structure. The first ni_tx_queues+1 entries refer
+ * to the tx rings, the next ni_rx_queues+1 refer to the rx rings
+ * (the last entry in each block refers to the host stack rings).
+ * The area is filled up by the kernel on NIOCREG,
+ * and then only read by userspace code.
+ */
+ ssize_t ring_ofs[0];
+};
+
+#ifndef NIOCREGIF
+/*
+ * ioctl names and related fields
+ *
+ * NIOCGINFO takes a struct ifreq, the interface name is the input,
+ * the outputs are number of queues and number of descriptor
+ * for each queue (useful to set number of threads etc.).
+ *
+ * NIOCREGIF takes an interface name within a struct ifreq,
+ * and activates netmap mode on the interface (if possible).
+ *
+ * NIOCUNREGIF unregisters the interface associated to the fd.
+ *
+ * NIOCTXSYNC, NIOCRXSYNC synchronize tx or rx queues,
+ * whose identity is set in NIOCREGIF through nr_ringid
+ */
+
+/*
+ * struct nmreq overlays a struct ifreq
+ */
+struct nmreq {
+ char nr_name[IFNAMSIZ];
+ uint32_t nr_version; /* API version */
+#define NETMAP_API 3 /* current version */
+ uint32_t nr_offset; /* nifp offset in the shared region */
+ uint32_t nr_memsize; /* size of the shared region */
+ uint32_t nr_tx_slots; /* slots in tx rings */
+ uint32_t nr_rx_slots; /* slots in rx rings */
+ uint16_t nr_tx_rings; /* number of tx rings */
+ uint16_t nr_rx_rings; /* number of rx rings */
+ uint16_t nr_ringid; /* ring(s) we care about */
+#define NETMAP_HW_RING 0x4000 /* low bits indicate one hw ring */
+#define NETMAP_SW_RING 0x2000 /* process the sw ring */
+#define NETMAP_NO_TX_POLL 0x1000 /* no automatic txsync on poll */
+#define NETMAP_RING_MASK 0xfff /* the ring number */
+ uint16_t spare1;
+ uint32_t spare2[4];
+};
+
+/*
+ * FreeBSD uses the size value embedded in the _IOWR to determine
+ * how much to copy in/out. So we need it to match the actual
+ * data structure we pass. We put some spares in the structure
+ * to ease compatibility with other versions
+ */
+#define NIOCGINFO _IOWR('i', 145, struct nmreq) /* return IF info */
+#define NIOCREGIF _IOWR('i', 146, struct nmreq) /* interface register */
+#define NIOCUNREGIF _IO('i', 147) /* interface unregister */
+#define NIOCTXSYNC _IO('i', 148) /* sync tx queues */
+#define NIOCRXSYNC _IO('i', 149) /* sync rx queues */
+#endif /* !NIOCREGIF */
+
+#endif /* _NET_NETMAP_H_ */
diff --git a/examples/netmap_compat/netmap/netmap_user.h b/examples/netmap_compat/netmap/netmap_user.h
new file mode 100644
index 00000000..f369592e
--- /dev/null
+++ b/examples/netmap_compat/netmap/netmap_user.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (C) 2011 Matteo Landi, Luigi Rizzo. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. Neither the name of the authors nor the names of their contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY MATTEO LANDI AND CONTRIBUTORS "AS IS" AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL MATTEO LANDI OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+ * THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * $FreeBSD: head/sys/net/netmap_user.h 231198 2012-02-08 11:43:29Z luigi $
+ * $Id: netmap_user.h 10879 2012-04-12 22:48:59Z luigi $
+ *
+ * This header contains the macros used to manipulate netmap structures
+ * and packets in userspace. See netmap(4) for more information.
+ *
+ * The address of the struct netmap_if, say nifp, is computed from the
+ * value returned from ioctl(.., NIOCREG, ...) and the mmap region:
+ * ioctl(fd, NIOCREG, &req);
+ * mem = mmap(0, ... );
+ * nifp = NETMAP_IF(mem, req.nr_nifp);
+ * (so simple, we could just do it manually)
+ *
+ * From there:
+ * struct netmap_ring *NETMAP_TXRING(nifp, index)
+ * struct netmap_ring *NETMAP_RXRING(nifp, index)
+ * we can access ring->nr_cur, ring->nr_avail, ring->nr_flags
+ *
+ * ring->slot[i] gives us the i-th slot (we can access
+ * directly plen, flags, bufindex)
+ *
+ * char *buf = NETMAP_BUF(ring, index) returns a pointer to
+ * the i-th buffer
+ *
+ * Since rings are circular, we have macros to compute the next index
+ * i = NETMAP_RING_NEXT(ring, i);
+ */
+
+#ifndef _NET_NETMAP_USER_H_
+#define _NET_NETMAP_USER_H_
+
+#define NETMAP_IF(b, o) (struct netmap_if *)((char *)(b) + (o))
+
+#define NETMAP_TXRING(nifp, index) \
+ ((struct netmap_ring *)((char *)(nifp) + \
+ (nifp)->ring_ofs[index] ) )
+
+#define NETMAP_RXRING(nifp, index) \
+ ((struct netmap_ring *)((char *)(nifp) + \
+ (nifp)->ring_ofs[index + (nifp)->ni_tx_rings + 1] ) )
+
+#define NETMAP_BUF(ring, index) \
+ ((char *)(ring) + (ring)->buf_ofs + ((index)*(ring)->nr_buf_size))
+
+#define NETMAP_BUF_IDX(ring, buf) \
+ ( ((char *)(buf) - ((char *)(ring) + (ring)->buf_ofs) ) / \
+ (ring)->nr_buf_size )
+
+#define NETMAP_RING_NEXT(r, i) \
+ ((i)+1 == (r)->num_slots ? 0 : (i) + 1 )
+
+#define NETMAP_RING_FIRST_RESERVED(r) \
+ ( (r)->cur < (r)->reserved ? \
+ (r)->cur + (r)->num_slots - (r)->reserved : \
+ (r)->cur - (r)->reserved )
+
+/*
+ * Return 1 if the given tx ring is empty.
+ */
+#define NETMAP_TX_RING_EMPTY(r) ((r)->avail >= (r)->num_slots - 1)
+
+#endif /* _NET_NETMAP_USER_H_ */
diff --git a/examples/packet_ordering/Makefile b/examples/packet_ordering/Makefile
new file mode 100644
index 00000000..9e080a30
--- /dev/null
+++ b/examples/packet_ordering/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-ivshmem-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = packet_ordering
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/packet_ordering/main.c b/examples/packet_ordering/main.c
new file mode 100644
index 00000000..15bb900c
--- /dev/null
+++ b/examples/packet_ordering/main.c
@@ -0,0 +1,756 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <signal.h>
+#include <getopt.h>
+
+#include <rte_eal.h>
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_lcore.h>
+#include <rte_malloc.h>
+#include <rte_mbuf.h>
+#include <rte_mempool.h>
+#include <rte_ring.h>
+#include <rte_reorder.h>
+
+#define RX_DESC_PER_QUEUE 128
+#define TX_DESC_PER_QUEUE 512
+
+#define MAX_PKTS_BURST 32
+#define REORDER_BUFFER_SIZE 8192
+#define MBUF_PER_POOL 65535
+#define MBUF_POOL_CACHE_SIZE 250
+
+#define RING_SIZE 16384
+
+/* uncomment below line to enable debug logs */
+/* #define DEBUG */
+
+#ifdef DEBUG
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#endif
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_REORDERAPP RTE_LOGTYPE_USER1
+
+unsigned int portmask;
+unsigned int disable_reorder;
+volatile uint8_t quit_signal;
+
+static struct rte_mempool *mbuf_pool;
+
+static struct rte_eth_conf port_conf_default;
+
+struct worker_thread_args {
+ struct rte_ring *ring_in;
+ struct rte_ring *ring_out;
+};
+
+struct send_thread_args {
+ struct rte_ring *ring_in;
+ struct rte_reorder_buffer *buffer;
+};
+
+volatile struct app_stats {
+ struct {
+ uint64_t rx_pkts;
+ uint64_t enqueue_pkts;
+ uint64_t enqueue_failed_pkts;
+ } rx __rte_cache_aligned;
+
+ struct {
+ uint64_t dequeue_pkts;
+ uint64_t enqueue_pkts;
+ uint64_t enqueue_failed_pkts;
+ } wkr __rte_cache_aligned;
+
+ struct {
+ uint64_t dequeue_pkts;
+ /* Too early pkts transmitted directly w/o reordering */
+ uint64_t early_pkts_txtd_woro;
+ /* Too early pkts failed from direct transmit */
+ uint64_t early_pkts_tx_failed_woro;
+ uint64_t ro_tx_pkts;
+ uint64_t ro_tx_failed_pkts;
+ } tx __rte_cache_aligned;
+} app_stats;
+
+/**
+ * Get the last enabled lcore ID
+ *
+ * @return
+ * The last enabled lcore ID.
+ */
+static unsigned int
+get_last_lcore_id(void)
+{
+ int i;
+
+ for (i = RTE_MAX_LCORE - 1; i >= 0; i--)
+ if (rte_lcore_is_enabled(i))
+ return i;
+ return 0;
+}
+
+/**
+ * Get the previous enabled lcore ID
+ * @param id
+ * The current lcore ID
+ * @return
+ * The previous enabled lcore ID or the current lcore
+ * ID if it is the first available core.
+ */
+static unsigned int
+get_previous_lcore_id(unsigned int id)
+{
+ int i;
+
+ for (i = id - 1; i >= 0; i--)
+ if (rte_lcore_is_enabled(i))
+ return i;
+ return id;
+}
+
+static inline void
+pktmbuf_free_bulk(struct rte_mbuf *mbuf_table[], unsigned n)
+{
+ unsigned int i;
+
+ for (i = 0; i < n; i++)
+ rte_pktmbuf_free(mbuf_table[i]);
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
+ prgname);
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ unsigned long pm;
+ char *end = NULL;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt;
+ int option_index;
+ char **argvopt;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {"disable-reorder", 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:",
+ lgopts, &option_index)) != EOF) {
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ portmask = parse_portmask(optarg);
+ if (portmask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ /* long options */
+ case 0:
+ if (!strcmp(lgopts[option_index].name, "disable-reorder")) {
+ printf("reorder disabled\n");
+ disable_reorder = 1;
+ }
+ break;
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+ if (optind <= 1) {
+ print_usage(prgname);
+ return -1;
+ }
+
+ argv[optind-1] = prgname;
+ optind = 0; /* reset getopt lib */
+ return 0;
+}
+
+/*
+ * Tx buffer error callback
+ */
+static void
+flush_tx_error_callback(struct rte_mbuf **unsent, uint16_t count,
+ void *userdata __rte_unused) {
+
+ /* free the mbufs which failed from transmit */
+ app_stats.tx.ro_tx_failed_pkts += count;
+ LOG_DEBUG(REORDERAPP, "%s:Packet loss with tx_burst\n", __func__);
+ pktmbuf_free_bulk(unsent, count);
+
+}
+
+static inline int
+free_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[]) {
+ const uint8_t nb_ports = rte_eth_dev_count();
+ unsigned port_id;
+
+ /* initialize buffers for all ports */
+ for (port_id = 0; port_id < nb_ports; port_id++) {
+ /* skip ports that are not enabled */
+ if ((portmask & (1 << port_id)) == 0)
+ continue;
+
+ rte_free(tx_buffer[port_id]);
+ }
+ return 0;
+}
+
+static inline int
+configure_tx_buffers(struct rte_eth_dev_tx_buffer *tx_buffer[])
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ unsigned port_id;
+ int ret;
+
+ /* initialize buffers for all ports */
+ for (port_id = 0; port_id < nb_ports; port_id++) {
+ /* skip ports that are not enabled */
+ if ((portmask & (1 << port_id)) == 0)
+ continue;
+
+ /* Initialize TX buffers */
+ tx_buffer[port_id] = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(MAX_PKTS_BURST), 0,
+ rte_eth_dev_socket_id(port_id));
+ if (tx_buffer[port_id] == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot allocate buffer for tx on port %u\n",
+ (unsigned) port_id);
+
+ rte_eth_tx_buffer_init(tx_buffer[port_id], MAX_PKTS_BURST);
+
+ ret = rte_eth_tx_buffer_set_err_callback(tx_buffer[port_id],
+ flush_tx_error_callback, NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot set error callback for "
+ "tx buffer on port %u\n", (unsigned) port_id);
+ }
+ return 0;
+}
+
+static inline int
+configure_eth_port(uint8_t port_id)
+{
+ struct ether_addr addr;
+ const uint16_t rxRings = 1, txRings = 1;
+ const uint8_t nb_ports = rte_eth_dev_count();
+ int ret;
+ uint16_t q;
+
+ if (port_id > nb_ports)
+ return -1;
+
+ ret = rte_eth_dev_configure(port_id, rxRings, txRings, &port_conf_default);
+ if (ret != 0)
+ return ret;
+
+ for (q = 0; q < rxRings; q++) {
+ ret = rte_eth_rx_queue_setup(port_id, q, RX_DESC_PER_QUEUE,
+ rte_eth_dev_socket_id(port_id), NULL,
+ mbuf_pool);
+ if (ret < 0)
+ return ret;
+ }
+
+ for (q = 0; q < txRings; q++) {
+ ret = rte_eth_tx_queue_setup(port_id, q, TX_DESC_PER_QUEUE,
+ rte_eth_dev_socket_id(port_id), NULL);
+ if (ret < 0)
+ return ret;
+ }
+
+ ret = rte_eth_dev_start(port_id);
+ if (ret < 0)
+ return ret;
+
+ rte_eth_macaddr_get(port_id, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port_id,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port_id);
+
+ return 0;
+}
+
+static void
+print_stats(void)
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ unsigned i;
+ struct rte_eth_stats eth_stats;
+
+ printf("\nRX thread stats:\n");
+ printf(" - Pkts rxd: %"PRIu64"\n",
+ app_stats.rx.rx_pkts);
+ printf(" - Pkts enqd to workers ring: %"PRIu64"\n",
+ app_stats.rx.enqueue_pkts);
+
+ printf("\nWorker thread stats:\n");
+ printf(" - Pkts deqd from workers ring: %"PRIu64"\n",
+ app_stats.wkr.dequeue_pkts);
+ printf(" - Pkts enqd to tx ring: %"PRIu64"\n",
+ app_stats.wkr.enqueue_pkts);
+ printf(" - Pkts enq to tx failed: %"PRIu64"\n",
+ app_stats.wkr.enqueue_failed_pkts);
+
+ printf("\nTX stats:\n");
+ printf(" - Pkts deqd from tx ring: %"PRIu64"\n",
+ app_stats.tx.dequeue_pkts);
+ printf(" - Ro Pkts transmitted: %"PRIu64"\n",
+ app_stats.tx.ro_tx_pkts);
+ printf(" - Ro Pkts tx failed: %"PRIu64"\n",
+ app_stats.tx.ro_tx_failed_pkts);
+ printf(" - Pkts transmitted w/o reorder: %"PRIu64"\n",
+ app_stats.tx.early_pkts_txtd_woro);
+ printf(" - Pkts tx failed w/o reorder: %"PRIu64"\n",
+ app_stats.tx.early_pkts_tx_failed_woro);
+
+ for (i = 0; i < nb_ports; i++) {
+ rte_eth_stats_get(i, &eth_stats);
+ printf("\nPort %u stats:\n", i);
+ printf(" - Pkts in: %"PRIu64"\n", eth_stats.ipackets);
+ printf(" - Pkts out: %"PRIu64"\n", eth_stats.opackets);
+ printf(" - In Errs: %"PRIu64"\n", eth_stats.ierrors);
+ printf(" - Out Errs: %"PRIu64"\n", eth_stats.oerrors);
+ printf(" - Mbuf Errs: %"PRIu64"\n", eth_stats.rx_nombuf);
+ }
+}
+
+static void
+int_handler(int sig_num)
+{
+ printf("Exiting on signal %d\n", sig_num);
+ quit_signal = 1;
+}
+
+/**
+ * This thread receives mbufs from the port and affects them an internal
+ * sequence number to keep track of their order of arrival through an
+ * mbuf structure.
+ * The mbufs are then passed to the worker threads via the rx_to_workers
+ * ring.
+ */
+static int
+rx_thread(struct rte_ring *ring_out)
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ uint32_t seqn = 0;
+ uint16_t i, ret = 0;
+ uint16_t nb_rx_pkts;
+ uint8_t port_id;
+ struct rte_mbuf *pkts[MAX_PKTS_BURST];
+
+ RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
+ rte_lcore_id());
+
+ while (!quit_signal) {
+
+ for (port_id = 0; port_id < nb_ports; port_id++) {
+ if ((portmask & (1 << port_id)) != 0) {
+
+ /* receive packets */
+ nb_rx_pkts = rte_eth_rx_burst(port_id, 0,
+ pkts, MAX_PKTS_BURST);
+ if (nb_rx_pkts == 0) {
+ LOG_DEBUG(REORDERAPP,
+ "%s():Received zero packets\n", __func__);
+ continue;
+ }
+ app_stats.rx.rx_pkts += nb_rx_pkts;
+
+ /* mark sequence number */
+ for (i = 0; i < nb_rx_pkts; )
+ pkts[i++]->seqn = seqn++;
+
+ /* enqueue to rx_to_workers ring */
+ ret = rte_ring_enqueue_burst(ring_out, (void *) pkts,
+ nb_rx_pkts);
+ app_stats.rx.enqueue_pkts += ret;
+ if (unlikely(ret < nb_rx_pkts)) {
+ app_stats.rx.enqueue_failed_pkts +=
+ (nb_rx_pkts-ret);
+ pktmbuf_free_bulk(&pkts[ret], nb_rx_pkts - ret);
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+/**
+ * This thread takes bursts of packets from the rx_to_workers ring and
+ * Changes the input port value to output port value. And feds it to
+ * workers_to_tx
+ */
+static int
+worker_thread(void *args_ptr)
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ uint16_t i, ret = 0;
+ uint16_t burst_size = 0;
+ struct worker_thread_args *args;
+ struct rte_mbuf *burst_buffer[MAX_PKTS_BURST] = { NULL };
+ struct rte_ring *ring_in, *ring_out;
+ const unsigned xor_val = (nb_ports > 1);
+
+ args = (struct worker_thread_args *) args_ptr;
+ ring_in = args->ring_in;
+ ring_out = args->ring_out;
+
+ RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
+ rte_lcore_id());
+
+ while (!quit_signal) {
+
+ /* dequeue the mbufs from rx_to_workers ring */
+ burst_size = rte_ring_dequeue_burst(ring_in,
+ (void *)burst_buffer, MAX_PKTS_BURST);
+ if (unlikely(burst_size == 0))
+ continue;
+
+ __sync_fetch_and_add(&app_stats.wkr.dequeue_pkts, burst_size);
+
+ /* just do some operation on mbuf */
+ for (i = 0; i < burst_size;)
+ burst_buffer[i++]->port ^= xor_val;
+
+ /* enqueue the modified mbufs to workers_to_tx ring */
+ ret = rte_ring_enqueue_burst(ring_out, (void *)burst_buffer, burst_size);
+ __sync_fetch_and_add(&app_stats.wkr.enqueue_pkts, ret);
+ if (unlikely(ret < burst_size)) {
+ /* Return the mbufs to their respective pool, dropping packets */
+ __sync_fetch_and_add(&app_stats.wkr.enqueue_failed_pkts,
+ (int)burst_size - ret);
+ pktmbuf_free_bulk(&burst_buffer[ret], burst_size - ret);
+ }
+ }
+ return 0;
+}
+
+/**
+ * Dequeue mbufs from the workers_to_tx ring and reorder them before
+ * transmitting.
+ */
+static int
+send_thread(struct send_thread_args *args)
+{
+ int ret;
+ unsigned int i, dret;
+ uint16_t nb_dq_mbufs;
+ uint8_t outp;
+ unsigned sent;
+ struct rte_mbuf *mbufs[MAX_PKTS_BURST];
+ struct rte_mbuf *rombufs[MAX_PKTS_BURST] = {NULL};
+ static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+ RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__, rte_lcore_id());
+
+ configure_tx_buffers(tx_buffer);
+
+ while (!quit_signal) {
+
+ /* deque the mbufs from workers_to_tx ring */
+ nb_dq_mbufs = rte_ring_dequeue_burst(args->ring_in,
+ (void *)mbufs, MAX_PKTS_BURST);
+
+ if (unlikely(nb_dq_mbufs == 0))
+ continue;
+
+ app_stats.tx.dequeue_pkts += nb_dq_mbufs;
+
+ for (i = 0; i < nb_dq_mbufs; i++) {
+ /* send dequeued mbufs for reordering */
+ ret = rte_reorder_insert(args->buffer, mbufs[i]);
+
+ if (ret == -1 && rte_errno == ERANGE) {
+ /* Too early pkts should be transmitted out directly */
+ LOG_DEBUG(REORDERAPP, "%s():Cannot reorder early packet "
+ "direct enqueuing to TX\n", __func__);
+ outp = mbufs[i]->port;
+ if ((portmask & (1 << outp)) == 0) {
+ rte_pktmbuf_free(mbufs[i]);
+ continue;
+ }
+ if (rte_eth_tx_burst(outp, 0, (void *)mbufs[i], 1) != 1) {
+ rte_pktmbuf_free(mbufs[i]);
+ app_stats.tx.early_pkts_tx_failed_woro++;
+ } else
+ app_stats.tx.early_pkts_txtd_woro++;
+ } else if (ret == -1 && rte_errno == ENOSPC) {
+ /**
+ * Early pkts just outside of window should be dropped
+ */
+ rte_pktmbuf_free(mbufs[i]);
+ }
+ }
+
+ /*
+ * drain MAX_PKTS_BURST of reordered
+ * mbufs for transmit
+ */
+ dret = rte_reorder_drain(args->buffer, rombufs, MAX_PKTS_BURST);
+ for (i = 0; i < dret; i++) {
+
+ struct rte_eth_dev_tx_buffer *outbuf;
+ uint8_t outp1;
+
+ outp1 = rombufs[i]->port;
+ /* skip ports that are not enabled */
+ if ((portmask & (1 << outp1)) == 0) {
+ rte_pktmbuf_free(rombufs[i]);
+ continue;
+ }
+
+ outbuf = tx_buffer[outp1];
+ sent = rte_eth_tx_buffer(outp1, 0, outbuf, rombufs[i]);
+ if (sent)
+ app_stats.tx.ro_tx_pkts += sent;
+ }
+ }
+
+ free_tx_buffers(tx_buffer);
+
+ return 0;
+}
+
+/**
+ * Dequeue mbufs from the workers_to_tx ring and transmit them
+ */
+static int
+tx_thread(struct rte_ring *ring_in)
+{
+ uint32_t i, dqnum;
+ uint8_t outp;
+ unsigned sent;
+ struct rte_mbuf *mbufs[MAX_PKTS_BURST];
+ struct rte_eth_dev_tx_buffer *outbuf;
+ static struct rte_eth_dev_tx_buffer *tx_buffer[RTE_MAX_ETHPORTS];
+
+ RTE_LOG(INFO, REORDERAPP, "%s() started on lcore %u\n", __func__,
+ rte_lcore_id());
+
+ configure_tx_buffers(tx_buffer);
+
+ while (!quit_signal) {
+
+ /* deque the mbufs from workers_to_tx ring */
+ dqnum = rte_ring_dequeue_burst(ring_in,
+ (void *)mbufs, MAX_PKTS_BURST);
+
+ if (unlikely(dqnum == 0))
+ continue;
+
+ app_stats.tx.dequeue_pkts += dqnum;
+
+ for (i = 0; i < dqnum; i++) {
+ outp = mbufs[i]->port;
+ /* skip ports that are not enabled */
+ if ((portmask & (1 << outp)) == 0) {
+ rte_pktmbuf_free(mbufs[i]);
+ continue;
+ }
+
+ outbuf = tx_buffer[outp];
+ sent = rte_eth_tx_buffer(outp, 0, outbuf, mbufs[i]);
+ if (sent)
+ app_stats.tx.ro_tx_pkts += sent;
+ }
+ }
+
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned nb_ports;
+ unsigned int lcore_id, last_lcore_id, master_lcore_id;
+ uint8_t port_id;
+ uint8_t nb_ports_available;
+ struct worker_thread_args worker_args = {NULL, NULL};
+ struct send_thread_args send_args = {NULL, NULL};
+ struct rte_ring *rx_to_workers;
+ struct rte_ring *workers_to_tx;
+
+ /* catch ctrl-c so we can print on exit */
+ signal(SIGINT, int_handler);
+
+ /* Initialize EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ argc -= ret;
+ argv += ret;
+
+ /* Parse the application specific arguments */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ /* Check if we have enought cores */
+ if (rte_lcore_count() < 3)
+ rte_exit(EXIT_FAILURE, "Error, This application needs at "
+ "least 3 logical cores to run:\n"
+ "1 lcore for packet RX\n"
+ "1 lcore for packet TX\n"
+ "and at least 1 lcore for worker threads\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports == 0)
+ rte_exit(EXIT_FAILURE, "Error: no ethernet ports detected\n");
+ if (nb_ports != 1 && (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even, except "
+ "when using a single port\n");
+
+ mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL,
+ MBUF_POOL_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+
+ nb_ports_available = nb_ports;
+
+ /* initialize all ports */
+ for (port_id = 0; port_id < nb_ports; port_id++) {
+ /* skip ports that are not enabled */
+ if ((portmask & (1 << port_id)) == 0) {
+ printf("\nSkipping disabled port %d\n", port_id);
+ nb_ports_available--;
+ continue;
+ }
+ /* init port */
+ printf("Initializing port %u... done\n", (unsigned) port_id);
+
+ if (configure_eth_port(port_id) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize port %"PRIu8"\n",
+ port_id);
+ }
+
+ if (!nb_ports_available) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled. Please set portmask.\n");
+ }
+
+ /* Create rings for inter core communication */
+ rx_to_workers = rte_ring_create("rx_to_workers", RING_SIZE, rte_socket_id(),
+ RING_F_SP_ENQ);
+ if (rx_to_workers == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+
+ workers_to_tx = rte_ring_create("workers_to_tx", RING_SIZE, rte_socket_id(),
+ RING_F_SC_DEQ);
+ if (workers_to_tx == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+
+ if (!disable_reorder) {
+ send_args.buffer = rte_reorder_create("PKT_RO", rte_socket_id(),
+ REORDER_BUFFER_SIZE);
+ if (send_args.buffer == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+ }
+
+ last_lcore_id = get_last_lcore_id();
+ master_lcore_id = rte_get_master_lcore();
+
+ worker_args.ring_in = rx_to_workers;
+ worker_args.ring_out = workers_to_tx;
+
+ /* Start worker_thread() on all the available slave cores but the last 1 */
+ for (lcore_id = 0; lcore_id <= get_previous_lcore_id(last_lcore_id); lcore_id++)
+ if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id)
+ rte_eal_remote_launch(worker_thread, (void *)&worker_args,
+ lcore_id);
+
+ if (disable_reorder) {
+ /* Start tx_thread() on the last slave core */
+ rte_eal_remote_launch((lcore_function_t *)tx_thread, workers_to_tx,
+ last_lcore_id);
+ } else {
+ send_args.ring_in = workers_to_tx;
+ /* Start send_thread() on the last slave core */
+ rte_eal_remote_launch((lcore_function_t *)send_thread,
+ (void *)&send_args, last_lcore_id);
+ }
+
+ /* Start rx_thread() on the master core */
+ rx_thread(rx_to_workers);
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ print_stats();
+ return 0;
+}
diff --git a/examples/performance-thread/Makefile b/examples/performance-thread/Makefile
new file mode 100644
index 00000000..d19f8489
--- /dev/null
+++ b/examples/performance-thread/Makefile
@@ -0,0 +1,49 @@
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_ARCH),"x86_64")
+$(error This application is only supported for x86_64 targets)
+endif
+
+DIRS-y += l3fwd-thread
+DIRS-y += pthread_shim
+
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/performance-thread/common/arch/x86/ctx.c b/examples/performance-thread/common/arch/x86/ctx.c
new file mode 100644
index 00000000..1e8e2717
--- /dev/null
+++ b/examples/performance-thread/common/arch/x86/ctx.c
@@ -0,0 +1,93 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * https://github.com/halayli/lthread which carries the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+
+#if defined(__x86_64__)
+__asm__ (
+".text\n"
+".p2align 4,,15\n"
+".globl ctx_switch\n"
+".globl _ctx_switch\n"
+"ctx_switch:\n"
+"_ctx_switch:\n"
+" movq %rsp, 0(%rsi) # save stack_pointer\n"
+" movq %rbp, 8(%rsi) # save frame_pointer\n"
+" movq (%rsp), %rax # save insn_pointer\n"
+" movq %rax, 16(%rsi)\n"
+" movq %rbx, 24(%rsi)\n # save rbx,r12-r15\n"
+" movq 24(%rdi), %rbx\n"
+" movq %r15, 56(%rsi)\n"
+" movq %r14, 48(%rsi)\n"
+" movq 48(%rdi), %r14\n"
+" movq 56(%rdi), %r15\n"
+" movq %r13, 40(%rsi)\n"
+" movq %r12, 32(%rsi)\n"
+" movq 32(%rdi), %r12\n"
+" movq 40(%rdi), %r13\n"
+" movq 0(%rdi), %rsp # restore stack_pointer\n"
+" movq 16(%rdi), %rax # restore insn_pointer\n"
+" movq 8(%rdi), %rbp # restore frame_pointer\n"
+" movq %rax, (%rsp)\n"
+" ret\n"
+ );
+#else
+#pragma GCC error "__x86_64__ is not defined"
+#endif
diff --git a/examples/performance-thread/common/arch/x86/ctx.h b/examples/performance-thread/common/arch/x86/ctx.h
new file mode 100644
index 00000000..03860508
--- /dev/null
+++ b/examples/performance-thread/common/arch/x86/ctx.h
@@ -0,0 +1,57 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef CTX_H
+#define CTX_H
+
+/*
+ * CPU context registers
+ */
+struct ctx {
+ void *rsp; /* 0 */
+ void *rbp; /* 8 */
+ void *rip; /* 16 */
+ void *rbx; /* 24 */
+ void *r12; /* 32 */
+ void *r13; /* 40 */
+ void *r14; /* 48 */
+ void *r15; /* 56 */
+};
+
+
+void
+ctx_switch(struct ctx *new_ctx, struct ctx *curr_ctx);
+
+
+#endif /* RTE_CTX_H_ */
diff --git a/examples/performance-thread/common/common.mk b/examples/performance-thread/common/common.mk
new file mode 100644
index 00000000..d3de5fc6
--- /dev/null
+++ b/examples/performance-thread/common/common.mk
@@ -0,0 +1,42 @@
+#
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# list the C files belonhing to the lthread subsystem, these are common to all lthread apps
+SRCS-y += ../common/lthread.c \
+ ../common/lthread_sched.c \
+ ../common/lthread_cond.c \
+ ../common/lthread_tls.c \
+ ../common/lthread_mutex.c \
+ ../common/lthread_diag.c \
+ ../common/arch/x86/ctx.c
+
+INCLUDES += -I$(RTE_SDK)/examples/performance-thread/common/ -I$(RTE_SDK)/examples/performance-thread/common/arch/x86/
diff --git a/examples/performance-thread/common/lthread.c b/examples/performance-thread/common/lthread.c
new file mode 100644
index 00000000..8fbff737
--- /dev/null
+++ b/examples/performance-thread/common/lthread.c
@@ -0,0 +1,529 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#define RTE_MEM 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+
+#include <rte_log.h>
+#include <ctx.h>
+
+#include "lthread_api.h"
+#include "lthread.h"
+#include "lthread_timer.h"
+#include "lthread_tls.h"
+#include "lthread_objcache.h"
+#include "lthread_diag.h"
+
+
+/*
+ * This function gets called after an lthread function has returned.
+ */
+void _lthread_exit_handler(struct lthread *lt)
+{
+
+ lt->state |= BIT(ST_LT_EXITED);
+
+ if (!(lt->state & BIT(ST_LT_DETACH))) {
+ /* thread is this not explicitly detached
+ * it must be joinable, so we call lthread_exit().
+ */
+ lthread_exit(NULL);
+ }
+
+ /* if we get here the thread is detached so we can reschedule it,
+ * allowing the scheduler to free it
+ */
+ _reschedule();
+}
+
+
+/*
+ * Free resources allocated to an lthread
+ */
+void _lthread_free(struct lthread *lt)
+{
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_FREE, lt, 0);
+
+ /* invoke any user TLS destructor functions */
+ _lthread_tls_destroy(lt);
+
+ /* free memory allocated for TLS defined using RTE_PER_LTHREAD macros */
+ if (sizeof(void *) < (uint64_t)RTE_PER_LTHREAD_SECTION_SIZE)
+ _lthread_objcache_free(lt->tls->root_sched->per_lthread_cache,
+ lt->per_lthread_data);
+
+ /* free pthread style TLS memory */
+ _lthread_objcache_free(lt->tls->root_sched->tls_cache, lt->tls);
+
+ /* free the stack */
+ _lthread_objcache_free(lt->stack_container->root_sched->stack_cache,
+ lt->stack_container);
+
+ /* now free the thread */
+ _lthread_objcache_free(lt->root_sched->lthread_cache, lt);
+
+}
+
+/*
+ * Allocate a stack and maintain a cache of stacks
+ */
+struct lthread_stack *_stack_alloc(void)
+{
+ struct lthread_stack *s;
+
+ s = _lthread_objcache_alloc((THIS_SCHED)->stack_cache);
+ LTHREAD_ASSERT(s != NULL);
+
+ s->root_sched = THIS_SCHED;
+ s->stack_size = LTHREAD_MAX_STACK_SIZE;
+ return s;
+}
+
+/*
+ * Execute a ctx by invoking the start function
+ * On return call an exit handler if the user has provided one
+ */
+static void _lthread_exec(void *arg)
+{
+ struct lthread *lt = (struct lthread *)arg;
+
+ /* invoke the contexts function */
+ lt->fun(lt->arg);
+ /* do exit handling */
+ if (lt->exit_handler != NULL)
+ lt->exit_handler(lt);
+}
+
+/*
+ * Initialize an lthread
+ * Set its function, args, and exit handler
+ */
+void
+_lthread_init(struct lthread *lt,
+ lthread_func_t fun, void *arg, lthread_exit_func exit_handler)
+{
+
+ /* set ctx func and args */
+ lt->fun = fun;
+ lt->arg = arg;
+ lt->exit_handler = exit_handler;
+
+ /* set initial state */
+ lt->birth = _sched_now();
+ lt->state = BIT(ST_LT_INIT);
+ lt->join = LT_JOIN_INITIAL;
+}
+
+/*
+ * set the lthread stack
+ */
+void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size)
+{
+ char *stack_top = (char *)stack + stack_size;
+ void **s = (void **)stack_top;
+
+ /* set stack */
+ lt->stack = stack;
+ lt->stack_size = stack_size;
+
+ /* set initial context */
+ s[-3] = NULL;
+ s[-2] = (void *)lt;
+ lt->ctx.rsp = (void *)(stack_top - (4 * sizeof(void *)));
+ lt->ctx.rbp = (void *)(stack_top - (3 * sizeof(void *)));
+ lt->ctx.rip = (void *)_lthread_exec;
+}
+
+/*
+ * Create an lthread on the current scheduler
+ * If there is no current scheduler on this pthread then first create one
+ */
+int
+lthread_create(struct lthread **new_lt, int lcore_id,
+ lthread_func_t fun, void *arg)
+{
+ if ((new_lt == NULL) || (fun == NULL))
+ return POSIX_ERRNO(EINVAL);
+
+ if (lcore_id < 0)
+ lcore_id = rte_lcore_id();
+ else if (lcore_id > LTHREAD_MAX_LCORES)
+ return POSIX_ERRNO(EINVAL);
+
+ struct lthread *lt = NULL;
+
+ if (THIS_SCHED == NULL) {
+ THIS_SCHED = _lthread_sched_create(0);
+ if (THIS_SCHED == NULL) {
+ perror("Failed to create scheduler");
+ return POSIX_ERRNO(EAGAIN);
+ }
+ }
+
+ /* allocate a thread structure */
+ lt = _lthread_objcache_alloc((THIS_SCHED)->lthread_cache);
+ if (lt == NULL)
+ return POSIX_ERRNO(EAGAIN);
+
+ bzero(lt, sizeof(struct lthread));
+ lt->root_sched = THIS_SCHED;
+
+ /* set the function args and exit handlder */
+ _lthread_init(lt, fun, arg, _lthread_exit_handler);
+
+ /* put it in the ready queue */
+ *new_lt = lt;
+
+ if (lcore_id < 0)
+ lcore_id = rte_lcore_id();
+
+ DIAG_CREATE_EVENT(lt, LT_DIAG_LTHREAD_CREATE);
+
+ rte_wmb();
+ _ready_queue_insert(_lthread_sched_get(lcore_id), lt);
+ return 0;
+}
+
+/*
+ * Schedules lthread to sleep for `nsecs`
+ * setting the lthread state to LT_ST_SLEEPING.
+ * lthread state is cleared upon resumption or expiry.
+ */
+static inline void _lthread_sched_sleep(struct lthread *lt, uint64_t nsecs)
+{
+ uint64_t state = lt->state;
+ uint64_t clks = _ns_to_clks(nsecs);
+
+ if (clks) {
+ _timer_start(lt, clks);
+ lt->state = state | BIT(ST_LT_SLEEPING);
+ }
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_SLEEP, clks, 0);
+ _suspend();
+}
+
+
+
+/*
+ * Cancels any running timer.
+ * This can be called multiple times on the same lthread regardless if it was
+ * sleeping or not.
+ */
+int _lthread_desched_sleep(struct lthread *lt)
+{
+ uint64_t state = lt->state;
+
+ if (state & BIT(ST_LT_SLEEPING)) {
+ _timer_stop(lt);
+ state &= (CLEARBIT(ST_LT_SLEEPING) & CLEARBIT(ST_LT_EXPIRED));
+ lt->state = state | BIT(ST_LT_READY);
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * set user data pointer in an lthread
+ */
+void lthread_set_data(void *data)
+{
+ if (sizeof(void *) == RTE_PER_LTHREAD_SECTION_SIZE)
+ THIS_LTHREAD->per_lthread_data = data;
+}
+
+/*
+ * Retrieve user data pointer from an lthread
+ */
+void *lthread_get_data(void)
+{
+ return THIS_LTHREAD->per_lthread_data;
+}
+
+/*
+ * Return the current lthread handle
+ */
+struct lthread *lthread_current(void)
+{
+ struct lthread_sched *sched = THIS_SCHED;
+
+ if (sched)
+ return sched->current_lthread;
+ return NULL;
+}
+
+
+
+/*
+ * Tasklet to cancel a thread
+ */
+static void
+_cancel(void *arg)
+{
+ struct lthread *lt = (struct lthread *) arg;
+
+ lt->state |= BIT(ST_LT_CANCELLED);
+ lthread_detach();
+}
+
+
+/*
+ * Mark the specified as canceled
+ */
+int lthread_cancel(struct lthread *cancel_lt)
+{
+ struct lthread *lt;
+
+ if ((cancel_lt == NULL) || (cancel_lt == THIS_LTHREAD))
+ return POSIX_ERRNO(EINVAL);
+
+ DIAG_EVENT(cancel_lt, LT_DIAG_LTHREAD_CANCEL, cancel_lt, 0);
+
+ if (cancel_lt->sched != THIS_SCHED) {
+
+ /* spawn task-let to cancel the thread */
+ lthread_create(&lt,
+ cancel_lt->sched->lcore_id,
+ _cancel,
+ cancel_lt);
+ return 0;
+ }
+ cancel_lt->state |= BIT(ST_LT_CANCELLED);
+ return 0;
+}
+
+/*
+ * Suspend the current lthread for specified time
+ */
+void lthread_sleep(uint64_t nsecs)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ _lthread_sched_sleep(lt, nsecs);
+
+}
+
+/*
+ * Suspend the current lthread for specified time
+ */
+void lthread_sleep_clks(uint64_t clks)
+{
+ struct lthread *lt = THIS_LTHREAD;
+ uint64_t state = lt->state;
+
+ if (clks) {
+ _timer_start(lt, clks);
+ lt->state = state | BIT(ST_LT_SLEEPING);
+ }
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_SLEEP, clks, 0);
+ _suspend();
+}
+
+/*
+ * Requeue the current thread to the back of the ready queue
+ */
+void lthread_yield(void)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_YIELD, 0, 0);
+
+ _ready_queue_insert(THIS_SCHED, lt);
+ ctx_switch(&(THIS_SCHED)->ctx, &lt->ctx);
+}
+
+/*
+ * Exit the current lthread
+ * If a thread is joining pass the user pointer to it
+ */
+void lthread_exit(void *ptr)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ /* if thread is detached (this is not valid) just exit */
+ if (lt->state & BIT(ST_LT_DETACH))
+ return;
+
+ /* There is a race between lthread_join() and lthread_exit()
+ * - if exit before join then we suspend and resume on join
+ * - if join before exit then we resume the joining thread
+ */
+ if ((lt->join == LT_JOIN_INITIAL)
+ && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
+ LT_JOIN_EXITING)) {
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 1, 0);
+ _suspend();
+ /* set the exit value */
+ if ((ptr != NULL) && (lt->lt_join->lt_exit_ptr != NULL))
+ *(lt->lt_join->lt_exit_ptr) = ptr;
+
+ /* let the joining thread know we have set the exit value */
+ lt->join = LT_JOIN_EXIT_VAL_SET;
+ } else {
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_EXIT, 0, 0);
+ /* set the exit value */
+ if ((ptr != NULL) && (lt->lt_join->lt_exit_ptr != NULL))
+ *(lt->lt_join->lt_exit_ptr) = ptr;
+ /* let the joining thread know we have set the exit value */
+ lt->join = LT_JOIN_EXIT_VAL_SET;
+ _ready_queue_insert(lt->lt_join->sched,
+ (struct lthread *)lt->lt_join);
+ }
+
+
+ /* wait until the joinging thread has collected the exit value */
+ while (lt->join != LT_JOIN_EXIT_VAL_READ)
+ _reschedule();
+
+ /* reset join state */
+ lt->join = LT_JOIN_INITIAL;
+
+ /* detach it so its resources can be released */
+ lt->state |= (BIT(ST_LT_DETACH) | BIT(ST_LT_EXITED));
+}
+
+/*
+ * Join an lthread
+ * Suspend until the joined thread returns
+ */
+int lthread_join(struct lthread *lt, void **ptr)
+{
+ if (lt == NULL)
+ return POSIX_ERRNO(EINVAL);
+
+ struct lthread *current = THIS_LTHREAD;
+ uint64_t lt_state = lt->state;
+
+ /* invalid to join a detached thread, or a thread that is joined */
+ if ((lt_state & BIT(ST_LT_DETACH)) || (lt->join == LT_JOIN_THREAD_SET))
+ return POSIX_ERRNO(EINVAL);
+ /* pointer to the joining thread and a poingter to return a value */
+ lt->lt_join = current;
+ current->lt_exit_ptr = ptr;
+ /* There is a race between lthread_join() and lthread_exit()
+ * - if join before exit we suspend and will resume when exit is called
+ * - if exit before join we resume the exiting thread
+ */
+ if ((lt->join == LT_JOIN_INITIAL)
+ && rte_atomic64_cmpset(&lt->join, LT_JOIN_INITIAL,
+ LT_JOIN_THREAD_SET)) {
+
+ DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 1);
+ _suspend();
+ } else {
+ DIAG_EVENT(current, LT_DIAG_LTHREAD_JOIN, lt, 0);
+ _ready_queue_insert(lt->sched, lt);
+ }
+
+ /* wait for exiting thread to set return value */
+ while (lt->join != LT_JOIN_EXIT_VAL_SET)
+ _reschedule();
+
+ /* collect the return value */
+ if (ptr != NULL)
+ *ptr = *current->lt_exit_ptr;
+
+ /* let the exiting thread proceed to exit */
+ lt->join = LT_JOIN_EXIT_VAL_READ;
+ return 0;
+}
+
+
+/*
+ * Detach current lthread
+ * A detached thread cannot be joined
+ */
+void lthread_detach(void)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_DETACH, 0, 0);
+
+ uint64_t state = lt->state;
+
+ lt->state = state | BIT(ST_LT_DETACH);
+}
+
+/*
+ * Set function name of an lthread
+ * this is a debug aid
+ */
+void lthread_set_funcname(const char *f)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ strncpy(lt->funcname, f, sizeof(lt->funcname));
+ lt->funcname[sizeof(lt->funcname)-1] = 0;
+}
diff --git a/examples/performance-thread/common/lthread.h b/examples/performance-thread/common/lthread.h
new file mode 100644
index 00000000..8c77af82
--- /dev/null
+++ b/examples/performance-thread/common/lthread.h
@@ -0,0 +1,99 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef LTHREAD_H_
+#define LTHREAD_H_
+
+#include <rte_per_lcore.h>
+
+#include "lthread_api.h"
+#include "lthread_diag.h"
+
+struct lthread;
+struct lthread_sched;
+
+/* function to be called when a context function returns */
+typedef void (*lthread_exit_func) (struct lthread *);
+
+void _lthread_exit_handler(struct lthread *lt);
+
+void lthread_set_funcname(const char *f);
+
+void _lthread_sched_busy_sleep(struct lthread *lt, uint64_t nsecs);
+
+int _lthread_desched_sleep(struct lthread *lt);
+
+void _lthread_free(struct lthread *lt);
+
+struct lthread_sched *_lthread_sched_get(int lcore_id);
+
+struct lthread_stack *_stack_alloc(void);
+
+struct
+lthread_sched *_lthread_sched_create(size_t stack_size);
+
+void
+_lthread_init(struct lthread *lt,
+ lthread_func_t fun, void *arg, lthread_exit_func exit_handler);
+
+void _lthread_set_stack(struct lthread *lt, void *stack, size_t stack_size);
+
+#endif /* LTHREAD_H_ */
diff --git a/examples/performance-thread/common/lthread_api.h b/examples/performance-thread/common/lthread_api.h
new file mode 100644
index 00000000..ec976103
--- /dev/null
+++ b/examples/performance-thread/common/lthread_api.h
@@ -0,0 +1,832 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software may have been derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/**
+ * @file lthread_api.h
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * This file contains the public API for the L-thread subsystem
+ *
+ * The L_thread subsystem provides a simple cooperative scheduler to
+ * enable arbitrary functions to run as cooperative threads within a
+ * single P-thread.
+ *
+ * The subsystem provides a P-thread like API that is intended to assist in
+ * reuse of legacy code written for POSIX p_threads.
+ *
+ * The L-thread subsystem relies on cooperative multitasking, as such
+ * an L-thread must possess frequent rescheduling points. Often these
+ * rescheduling points are provided transparently when the application
+ * invokes an L-thread API.
+ *
+ * In some applications it is possible that the program may enter a loop the
+ * exit condition for which depends on the action of another thread or a
+ * response from hardware. In such a case it is necessary to yield the thread
+ * periodically in the loop body, to allow other threads an opportunity to
+ * run. This can be done by inserting a call to lthread_yield() or
+ * lthread_sleep(n) in the body of the loop.
+ *
+ * If the application makes expensive / blocking system calls or does other
+ * work that would take an inordinate amount of time to complete, this will
+ * stall the cooperative scheduler resulting in very poor performance.
+ *
+ * In such cases an L-thread can be migrated temporarily to another scheduler
+ * running in a different P-thread on another core. When the expensive or
+ * blocking operation is completed it can be migrated back to the original
+ * scheduler. In this way other threads can continue to run on the original
+ * scheduler and will be completely unaffected by the blocking behaviour.
+ * To migrate an L-thread to another scheduler the API lthread_set_affinity()
+ * is provided.
+ *
+ * If L-threads that share data are running on the same core it is possible
+ * to design programs where mutual exclusion mechanisms to protect shared data
+ * can be avoided. This is due to the fact that the cooperative threads cannot
+ * preempt each other.
+ *
+ * There are two cases where mutual exclusion mechanisms are necessary.
+ *
+ * a) Where the L-threads sharing data are running on different cores.
+ * b) Where code must yield while updating data shared with another thread.
+ *
+ * The L-thread subsystem provides a set of mutex APIs to help with such
+ * scenarios, however excessive reliance on on these will impact performance
+ * and is best avoided if possible.
+ *
+ * L-threads can synchronise using a fast condition variable implementation
+ * that supports signal and broadcast. An L-thread running on any core can
+ * wait on a condition.
+ *
+ * L-threads can have L-thread local storage with an API modelled on either the
+ * P-thread get/set specific API or using PER_LTHREAD macros modelled on the
+ * RTE_PER_LCORE macros. Alternatively a simple user data pointer may be set
+ * and retrieved from a thread.
+ */
+#ifndef LTHREAD_H
+#define LTHREAD_H
+
+#include <stdint.h>
+#include <sys/socket.h>
+#include <fcntl.h>
+#include <netinet/in.h>
+
+#include <rte_cycles.h>
+
+
+struct lthread;
+struct lthread_cond;
+struct lthread_mutex;
+
+struct lthread_condattr;
+struct lthread_mutexattr;
+
+typedef void (*lthread_func_t) (void *);
+
+/*
+ * Define the size of stack for an lthread
+ * Then this is the size that will be allocated on lthread creation
+ * This is a fixed size and will not grow.
+ */
+#define LTHREAD_MAX_STACK_SIZE (1024*64)
+
+/**
+ * Define the maximum number of TLS keys that can be created
+ *
+ */
+#define LTHREAD_MAX_KEYS 1024
+
+/**
+ * Define the maximum number of attempts to destroy an lthread's
+ * TLS data on thread exit
+ */
+#define LTHREAD_DESTRUCTOR_ITERATIONS 4
+
+
+/**
+ * Define the maximum number of lcores that will support lthreads
+ */
+#define LTHREAD_MAX_LCORES RTE_MAX_LCORE
+
+/**
+ * How many lthread objects to pre-allocate as the system grows
+ * applies to lthreads + stacks, TLS, mutexs, cond vars.
+ *
+ * @see _lthread_alloc()
+ * @see _cond_alloc()
+ * @see _mutex_alloc()
+ *
+ */
+#define LTHREAD_PREALLOC 100
+
+/**
+ * Set the number of schedulers in the system.
+ *
+ * This function may optionally be called before starting schedulers.
+ *
+ * If the number of schedulers is not set, or set to 0 then each scheduler
+ * will begin scheduling lthreads immediately it is started.
+
+ * If the number of schedulers is set to greater than 0, then each scheduler
+ * will wait until all schedulers have started before beginning to schedule
+ * lthreads.
+ *
+ * If an application wishes to have threads migrate between cores using
+ * lthread_set_affinity(), or join threads running on other cores using
+ * lthread_join(), then it is prudent to set the number of schedulers to ensure
+ * that all schedulers are initialised beforehand.
+ *
+ * @param num
+ * the number of schedulers in the system
+ * @return
+ * the number of schedulers in the system
+ */
+int lthread_num_schedulers_set(int num);
+
+/**
+ * Return the number of schedulers currently running
+ * @return
+ * the number of schedulers in the system
+ */
+int lthread_active_schedulers(void);
+
+/**
+ * Shutdown the specified scheduler
+ *
+ * This function tells the specified scheduler to
+ * exit if/when there is no more work to do.
+ *
+ * Note that although the scheduler will stop
+ * resources are not freed.
+ *
+ * @param lcore
+ * The lcore of the scheduler to shutdown
+ *
+ * @return
+ * none
+ */
+void lthread_scheduler_shutdown(unsigned lcore);
+
+/**
+ * Shutdown all schedulers
+ *
+ * This function tells all schedulers including the current scheduler to
+ * exit if/when there is no more work to do.
+ *
+ * Note that although the schedulers will stop
+ * resources are not freed.
+ *
+ * @return
+ * none
+ */
+void lthread_scheduler_shutdown_all(void);
+
+/**
+ * Run the lthread scheduler
+ *
+ * Runs the lthread scheduler.
+ * This function returns only if/when all lthreads have exited.
+ * This function must be the main loop of an EAL thread.
+ *
+ * @return
+ * none
+ */
+
+void lthread_run(void);
+
+/**
+ * Create an lthread
+ *
+ * Creates an lthread and places it in the ready queue on a particular
+ * lcore.
+ *
+ * If no scheduler exists yet on the curret lcore then one is created.
+ *
+ * @param new_lt
+ * Pointer to an lthread pointer that will be initialized
+ * @param lcore
+ * the lcore the thread should be started on or the current clore
+ * -1 the current lcore
+ * 0 - LTHREAD_MAX_LCORES any other lcore
+ * @param lthread_func
+ * Pointer to the function the for the thread to run
+ * @param arg
+ * Pointer to args that will be passed to the thread
+ *
+ * @return
+ * 0 success
+ * EAGAIN no resources available
+ * EINVAL NULL thread or function pointer, or lcore_id out of range
+ */
+int
+lthread_create(struct lthread **new_lt,
+ int lcore, lthread_func_t func, void *arg);
+
+/**
+ * Cancel an lthread
+ *
+ * Cancels an lthread and causes it to be terminated
+ * If the lthread is detached it will be freed immediately
+ * otherwise its resources will not be released until it is joined.
+ *
+ * @param new_lt
+ * Pointer to an lthread that will be cancelled
+ *
+ * @return
+ * 0 success
+ * EINVAL thread was NULL
+ */
+int lthread_cancel(struct lthread *lt);
+
+/**
+ * Join an lthread
+ *
+ * Joins the current thread with the specified lthread, and waits for that
+ * thread to exit.
+ * Passes an optional pointer to collect returned data.
+ *
+ * @param lt
+ * Pointer to the lthread to be joined
+ * @param ptr
+ * Pointer to pointer to collect returned data
+ *
+0 * @return
+ * 0 success
+ * EINVAL lthread could not be joined.
+ */
+int lthread_join(struct lthread *lt, void **ptr);
+
+/**
+ * Detach an lthread
+ *
+ * Detaches the current thread
+ * On exit a detached lthread will be freed immediately and will not wait
+ * to be joined. The default state for a thread is not detached.
+ *
+ * @return
+ * none
+ */
+void lthread_detach(void);
+
+/**
+ * Exit an lthread
+ *
+ * Terminate the current thread, optionally return data.
+ * The data may be collected by lthread_join()
+ *
+ * After calling this function the lthread will be suspended until it is
+ * joined. After it is joined then its resources will be freed.
+ *
+ * @param ptr
+ * Pointer to pointer to data to be returned
+ *
+ * @return
+ * none
+ */
+void lthread_exit(void *val);
+
+/**
+ * Cause the current lthread to sleep for n nanoseconds
+ *
+ * The current thread will be suspended until the specified time has elapsed
+ * or has been exceeded.
+ *
+ * Execution will switch to the next lthread that is ready to run
+ *
+ * @param nsecs
+ * Number of nanoseconds to sleep
+ *
+ * @return
+ * none
+ */
+void lthread_sleep(uint64_t nsecs);
+
+/**
+ * Cause the current lthread to sleep for n cpu clock ticks
+ *
+ * The current thread will be suspended until the specified time has elapsed
+ * or has been exceeded.
+ *
+ * Execution will switch to the next lthread that is ready to run
+ *
+ * @param clks
+ * Number of clock ticks to sleep
+ *
+ * @return
+ * none
+ */
+void lthread_sleep_clks(uint64_t clks);
+
+/**
+ * Yield the current lthread
+ *
+ * The current thread will yield and execution will switch to the
+ * next lthread that is ready to run
+ *
+ * @return
+ * none
+ */
+void lthread_yield(void);
+
+/**
+ * Migrate the current thread to another scheduler
+ *
+ * This function migrates the current thread to another scheduler.
+ * Execution will switch to the next lthread that is ready to run on the
+ * current scheduler. The current thread will be resumed on the new scheduler.
+ *
+ * @param lcore
+ * The lcore to migrate to
+ *
+ * @return
+ * 0 success we are now running on the specified core
+ * EINVAL the destination lcore was not valid
+ */
+int lthread_set_affinity(unsigned lcore);
+
+/**
+ * Return the current lthread
+ *
+ * Returns the current lthread
+ *
+ * @return
+ * pointer to the current lthread
+ */
+struct lthread
+*lthread_current(void);
+
+/**
+ * Associate user data with an lthread
+ *
+ * This function sets a user data pointer in the current lthread
+ * The pointer can be retrieved with lthread_get_data()
+ * It is the users responsibility to allocate and free any data referenced
+ * by the user pointer.
+ *
+ * @param data
+ * pointer to user data
+ *
+ * @return
+ * none
+ */
+void lthread_set_data(void *data);
+
+/**
+ * Get user data for the current lthread
+ *
+ * This function returns a user data pointer for the current lthread
+ * The pointer must first be set with lthread_set_data()
+ * It is the users responsibility to allocate and free any data referenced
+ * by the user pointer.
+ *
+ * @return
+ * pointer to user data
+ */
+void
+*lthread_get_data(void);
+
+struct lthread_key;
+typedef void (*tls_destructor_func) (void *);
+
+/**
+ * Create a key for lthread TLS
+ *
+ * This function is modelled on pthread_key_create
+ * It creates a thread-specific data key visible to all lthreads on the
+ * current scheduler.
+ *
+ * Key values may be used to locate thread-specific data.
+ * The same key value may be used by different threads, the values bound
+ * to the key by lthread_setspecific() are maintained on a per-thread
+ * basis and persist for the life of the calling thread.
+ *
+ * An optional destructor function may be associated with each key value.
+ * At thread exit, if a key value has a non-NULL destructor pointer, and the
+ * thread has a non-NULL value associated with the key, the function pointed
+ * to is called with the current associated value as its sole argument.
+ *
+ * @param key
+ * Pointer to the key to be created
+ * @param destructor
+ * Pointer to destructor function
+ *
+ * @return
+ * 0 success
+ * EINVAL the key ptr was NULL
+ * EAGAIN no resources available
+ */
+int lthread_key_create(unsigned int *key, tls_destructor_func destructor);
+
+/**
+ * Delete key for lthread TLS
+ *
+ * This function is modelled on pthread_key_delete().
+ * It deletes a thread-specific data key previously returned by
+ * lthread_key_create().
+ * The thread-specific data values associated with the key need not be NULL
+ * at the time that lthread_key_delete is called.
+ * It is the responsibility of the application to free any application
+ * storage or perform any cleanup actions for data structures related to the
+ * deleted key. This cleanup can be done either before or after
+ * lthread_key_delete is called.
+ *
+ * @param key
+ * The key to be deleted
+ *
+ * @return
+ * 0 Success
+ * EINVAL the key was invalid
+ */
+int lthread_key_delete(unsigned int key);
+
+/**
+ * Get lthread TLS
+ *
+ * This function is modelled on pthread_get_specific().
+ * It returns the value currently bound to the specified key on behalf of the
+ * calling thread. Calling lthread_getspecific() with a key value not
+ * obtained from lthread_key_create() or after key has been deleted with
+ * lthread_key_delete() will result in undefined behaviour.
+ * lthread_getspecific() may be called from a thread-specific data destructor
+ * function.
+ *
+ * @param key
+ * The key for which data is requested
+ *
+ * @return
+ * Pointer to the thread specific data associated with that key
+ * or NULL if no data has been set.
+ */
+void
+*lthread_getspecific(unsigned int key);
+
+/**
+ * Set lthread TLS
+ *
+ * This function is modelled on pthread_set_sepcific()
+ * It associates a thread-specific value with a key obtained via a previous
+ * call to lthread_key_create().
+ * Different threads may bind different values to the same key. These values
+ * are typically pointers to dynamically allocated memory that have been
+ * reserved by the calling thread. Calling lthread_setspecific with a key
+ * value not obtained from lthread_key_create or after the key has been
+ * deleted with lthread_key_delete will result in undefined behaviour.
+ *
+ * @param key
+ * The key for which data is to be set
+ * @param key
+ * Pointer to the user data
+ *
+ * @return
+ * 0 success
+ * EINVAL the key was invalid
+ */
+
+int lthread_setspecific(unsigned int key, const void *value);
+
+/**
+ * The macros below provide an alternative mechanism to access lthread local
+ * storage.
+ *
+ * The macros can be used to declare define and access per lthread local
+ * storage in a similar way to the RTE_PER_LCORE macros which control storage
+ * local to an lcore.
+ *
+ * Memory for per lthread variables declared in this way is allocated when the
+ * lthread is created and a pointer to this memory is stored in the lthread.
+ * The per lthread variables are accessed via the pointer + the offset of the
+ * particular variable.
+ *
+ * The total size of per lthread storage, and the variable offsets are found by
+ * defining the variables in a unique global memory section, the start and end
+ * of which is known. This global memory section is used only in the
+ * computation of the addresses of the lthread variables, and is never actually
+ * used to store any data.
+ *
+ * Due to the fact that variables declared this way may be scattered across
+ * many files, the start and end of the section and variable offsets are only
+ * known after linking, thus the computation of section size and variable
+ * addresses is performed at run time.
+ *
+ * These macros are primarily provided to aid porting of code that makes use
+ * of the existing RTE_PER_LCORE macros. In principle it would be more efficient
+ * to gather all lthread local variables into a single structure and
+ * set/retrieve a pointer to that struct using the alternative
+ * lthread_data_set/get APIs.
+ *
+ * These macros are mutually exclusive with the lthread_data_set/get APIs.
+ * If you define storage using these macros then the lthread_data_set/get APIs
+ * will not perform as expected, the lthread_data_set API does nothing, and the
+ * lthread_data_get API returns the start of global section.
+ *
+ */
+/* start and end of per lthread section */
+extern char __start_per_lt;
+extern char __stop_per_lt;
+
+
+#define RTE_DEFINE_PER_LTHREAD(type, name) \
+__typeof__(type)__attribute((section("per_lt"))) per_lt_##name
+
+/**
+ * Macro to declare an extern per lthread variable "var" of type "type"
+ */
+#define RTE_DECLARE_PER_LTHREAD(type, name) \
+extern __typeof__(type)__attribute((section("per_lt"))) per_lt_##name
+
+/**
+ * Read/write the per-lcore variable value
+ */
+#define RTE_PER_LTHREAD(name) ((typeof(per_lt_##name) *)\
+((char *)lthread_get_data() +\
+((char *) &per_lt_##name - &__start_per_lt)))
+
+/**
+ * Initialize a mutex
+ *
+ * This function provides a mutual exclusion device, the need for which
+ * can normally be avoided in a cooperative multitasking environment.
+ * It is provided to aid porting of legacy code originally written for
+ * preemptive multitasking environments such as pthreads.
+ *
+ * A mutex may be unlocked (not owned by any thread), or locked (owned by
+ * one thread).
+ *
+ * A mutex can never be owned by more than one thread simultaneously.
+ * A thread attempting to lock a mutex that is already locked by another
+ * thread is suspended until the owning thread unlocks the mutex.
+ *
+ * lthread_mutex_init() initializes the mutex object pointed to by mutex
+ * Optional mutex attributes specified in mutexattr, are reserved for future
+ * use and are currently ignored.
+ *
+ * If a thread calls lthread_mutex_lock() on the mutex, then if the mutex
+ * is currently unlocked, it becomes locked and owned by the calling
+ * thread, and lthread_mutex_lock returns immediately. If the mutex is
+ * already locked by another thread, lthread_mutex_lock suspends the calling
+ * thread until the mutex is unlocked.
+ *
+ * lthread_mutex_trylock behaves identically to rte_thread_mutex_lock, except
+ * that it does not block the calling thread if the mutex is already locked
+ * by another thread.
+ *
+ * lthread_mutex_unlock() unlocks the specified mutex. The mutex is assumed
+ * to be locked and owned by the calling thread.
+ *
+ * lthread_mutex_destroy() destroys a mutex object, freeing its resources.
+ * The mutex must be unlocked with nothing blocked on it before calling
+ * lthread_mutex_destroy.
+ *
+ * @param name
+ * Optional pointer to string describing the mutex
+ * @param mutex
+ * Pointer to pointer to the mutex to be initialized
+ * @param attribute
+ * Pointer to attribute - unused reserved
+ *
+ * @return
+ * 0 success
+ * EINVAL mutex was not a valid pointer
+ * EAGAIN insufficient resources
+ */
+
+int
+lthread_mutex_init(char *name, struct lthread_mutex **mutex,
+ const struct lthread_mutexattr *attr);
+
+/**
+ * Destroy a mutex
+ *
+ * This function destroys the specified mutex freeing its resources.
+ * The mutex must be unlocked before calling lthread_mutex_destroy.
+ *
+ * @see lthread_mutex_init()
+ *
+ * @param mutex
+ * Pointer to pointer to the mutex to be initialized
+ *
+ * @return
+ * 0 success
+ * EINVAL mutex was not an initialized mutex
+ * EBUSY mutex was still in use
+ */
+int lthread_mutex_destroy(struct lthread_mutex *mutex);
+
+/**
+ * Lock a mutex
+ *
+ * This function attempts to lock a mutex.
+ * If a thread calls lthread_mutex_lock() on the mutex, then if the mutex
+ * is currently unlocked, it becomes locked and owned by the calling
+ * thread, and lthread_mutex_lock returns immediately. If the mutex is
+ * already locked by another thread, lthread_mutex_lock suspends the calling
+ * thread until the mutex is unlocked.
+ *
+ * @see lthread_mutex_init()
+ *
+ * @param mutex
+ * Pointer to pointer to the mutex to be initialized
+ *
+ * @return
+ * 0 success
+ * EINVAL mutex was not an initialized mutex
+ * EDEADLOCK the mutex was already owned by the calling thread
+ */
+
+int lthread_mutex_lock(struct lthread_mutex *mutex);
+
+/**
+ * Try to lock a mutex
+ *
+ * This function attempts to lock a mutex.
+ * lthread_mutex_trylock behaves identically to rte_thread_mutex_lock, except
+ * that it does not block the calling thread if the mutex is already locked
+ * by another thread.
+ *
+ *
+ * @see lthread_mutex_init()
+ *
+ * @param mutex
+ * Pointer to pointer to the mutex to be initialized
+ *
+ * @return
+ * 0 success
+ * EINVAL mutex was not an initialized mutex
+ * EBUSY the mutex was already locked by another thread
+ */
+int lthread_mutex_trylock(struct lthread_mutex *mutex);
+
+/**
+ * Unlock a mutex
+ *
+ * This function attempts to unlock the specified mutex. The mutex is assumed
+ * to be locked and owned by the calling thread.
+ *
+ * The oldest of any threads blocked on the mutex is made ready and may
+ * compete with any other running thread to gain the mutex, it fails it will
+ * be blocked again.
+ *
+ * @param mutex
+ * Pointer to pointer to the mutex to be initialized
+ *
+ * @return
+ * 0 mutex was unlocked
+ * EINVAL mutex was not an initialized mutex
+ * EPERM the mutex was not owned by the calling thread
+ */
+
+int lthread_mutex_unlock(struct lthread_mutex *mutex);
+
+/**
+ * Initialize a condition variable
+ *
+ * This function initializes a condition variable.
+ *
+ * Condition variables can be used to communicate changes in the state of data
+ * shared between threads.
+ *
+ * @see lthread_cond_wait()
+ *
+ * @param name
+ * Pointer to optional string describing the condition variable
+ * @param c
+ * Pointer to pointer to the condition variable to be initialized
+ * @param attr
+ * Pointer to optional attribute reserved for future use, currently ignored
+ *
+ * @return
+ * 0 success
+ * EINVAL cond was not a valid pointer
+ * EAGAIN insufficient resources
+ */
+int
+lthread_cond_init(char *name, struct lthread_cond **c,
+ const struct lthread_condattr *attr);
+
+/**
+ * Destroy a condition variable
+ *
+ * This function destroys a condition variable that was created with
+ * lthread_cond_init() and releases its resources.
+ *
+ * @param cond
+ * Pointer to pointer to the condition variable to be destroyed
+ *
+ * @return
+ * 0 Success
+ * EBUSY condition variable was still in use
+ * EINVAL was not an initialised condition variable
+ */
+int lthread_cond_destroy(struct lthread_cond *cond);
+
+/**
+ * Wait on a condition variable
+ *
+ * The function blocks the current thread waiting on the condition variable
+ * specified by cond. The waiting thread unblocks only after another thread
+ * calls lthread_cond_signal, or lthread_cond_broadcast, specifying the
+ * same condition variable.
+ *
+ * @param cond
+ * Pointer to pointer to the condition variable to be waited on
+ *
+ * @param reserved
+ * reserved for future use
+ *
+ * @return
+ * 0 The condition was signalled ( Success )
+ * EINVAL was not a an initialised condition variable
+ */
+int lthread_cond_wait(struct lthread_cond *c, uint64_t reserved);
+
+/**
+ * Signal a condition variable
+ *
+ * The function unblocks one thread waiting for the condition variable cond.
+ * If no threads are waiting on cond, the rte_lthead_cond_signal() function
+ * has no effect.
+ *
+ * @param cond
+ * Pointer to pointer to the condition variable to be signalled
+ *
+ * @return
+ * 0 The condition was signalled ( Success )
+ * EINVAL was not a an initialised condition variable
+ */
+int lthread_cond_signal(struct lthread_cond *c);
+
+/**
+ * Broadcast a condition variable
+ *
+ * The function unblocks all threads waiting for the condition variable cond.
+ * If no threads are waiting on cond, the rte_lthead_cond_broadcast()
+ * function has no effect.
+ *
+ * @param cond
+ * Pointer to pointer to the condition variable to be signalled
+ *
+ * @return
+ * 0 The condition was signalled ( Success )
+ * EINVAL was not a an initialised condition variable
+ */
+int lthread_cond_broadcast(struct lthread_cond *c);
+
+#endif /* LTHREAD_H */
diff --git a/examples/performance-thread/common/lthread_cond.c b/examples/performance-thread/common/lthread_cond.c
new file mode 100644
index 00000000..96fcce04
--- /dev/null
+++ b/examples/performance-thread/common/lthread_cond.c
@@ -0,0 +1,239 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software may have been derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <errno.h>
+
+#include <rte_log.h>
+#include <rte_common.h>
+
+#include "lthread_api.h"
+#include "lthread_diag_api.h"
+#include "lthread_diag.h"
+#include "lthread_int.h"
+#include "lthread_sched.h"
+#include "lthread_queue.h"
+#include "lthread_objcache.h"
+#include "lthread_timer.h"
+#include "lthread_mutex.h"
+#include "lthread_cond.h"
+
+/*
+ * Create a condition variable
+ */
+int
+lthread_cond_init(char *name, struct lthread_cond **cond,
+ __rte_unused const struct lthread_condattr *attr)
+{
+ struct lthread_cond *c;
+
+ if (cond == NULL)
+ return POSIX_ERRNO(EINVAL);
+
+ /* allocate a condition variable from cache */
+ c = _lthread_objcache_alloc((THIS_SCHED)->cond_cache);
+
+ if (c == NULL)
+ return POSIX_ERRNO(EAGAIN);
+
+ c->blocked = _lthread_queue_create("blocked");
+ if (c->blocked == NULL) {
+ _lthread_objcache_free((THIS_SCHED)->cond_cache, (void *)c);
+ return POSIX_ERRNO(EAGAIN);
+ }
+
+ if (name == NULL)
+ strncpy(c->name, "no name", sizeof(c->name));
+ else
+ strncpy(c->name, name, sizeof(c->name));
+ c->name[sizeof(c->name)-1] = 0;
+
+ c->root_sched = THIS_SCHED;
+
+ (*cond) = c;
+ DIAG_CREATE_EVENT((*cond), LT_DIAG_COND_CREATE);
+ return 0;
+}
+
+/*
+ * Destroy a condition variable
+ */
+int lthread_cond_destroy(struct lthread_cond *c)
+{
+ if (c == NULL) {
+ DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ /* try to free it */
+ if (_lthread_queue_destroy(c->blocked) < 0) {
+ /* queue in use */
+ DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, POSIX_ERRNO(EBUSY));
+ return POSIX_ERRNO(EBUSY);
+ }
+
+ /* okay free it */
+ _lthread_objcache_free(c->root_sched->cond_cache, c);
+ DIAG_EVENT(c, LT_DIAG_COND_DESTROY, c, 0);
+ return 0;
+}
+
+/*
+ * Wait on a condition variable
+ */
+int lthread_cond_wait(struct lthread_cond *c, __rte_unused uint64_t reserved)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ if (c == NULL) {
+ DIAG_EVENT(c, LT_DIAG_COND_WAIT, c, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+
+ DIAG_EVENT(c, LT_DIAG_COND_WAIT, c, 0);
+
+ /* queue the current thread in the blocked queue
+ * this will be written when we return to the scheduler
+ * to ensure that the current thread context is saved
+ * before any signal could result in it being dequeued and
+ * resumed
+ */
+ lt->pending_wr_queue = c->blocked;
+ _suspend();
+
+ /* the condition happened */
+ return 0;
+}
+
+/*
+ * Signal a condition variable
+ * attempt to resume any blocked thread
+ */
+int lthread_cond_signal(struct lthread_cond *c)
+{
+ struct lthread *lt;
+
+ if (c == NULL) {
+ DIAG_EVENT(c, LT_DIAG_COND_SIGNAL, c, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ lt = _lthread_queue_remove(c->blocked);
+
+ if (lt != NULL) {
+ /* okay wake up this thread */
+ DIAG_EVENT(c, LT_DIAG_COND_SIGNAL, c, lt);
+ _ready_queue_insert((struct lthread_sched *)lt->sched, lt);
+ }
+ return 0;
+}
+
+/*
+ * Broadcast a condition variable
+ */
+int lthread_cond_broadcast(struct lthread_cond *c)
+{
+ struct lthread *lt;
+
+ if (c == NULL) {
+ DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, 0);
+ do {
+ /* drain the queue waking everybody */
+ lt = _lthread_queue_remove(c->blocked);
+
+ if (lt != NULL) {
+ DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, lt);
+ /* wake up */
+ _ready_queue_insert((struct lthread_sched *)lt->sched,
+ lt);
+ }
+ } while (!_lthread_queue_empty(c->blocked));
+ _reschedule();
+ DIAG_EVENT(c, LT_DIAG_COND_BROADCAST, c, 0);
+ return 0;
+}
+
+/*
+ * return the diagnostic ref val stored in a condition var
+ */
+uint64_t
+lthread_cond_diag_ref(struct lthread_cond *c)
+{
+ if (c == NULL)
+ return 0;
+ return c->diag_ref;
+}
diff --git a/examples/performance-thread/common/lthread_cond.h b/examples/performance-thread/common/lthread_cond.h
new file mode 100644
index 00000000..5bd02a7d
--- /dev/null
+++ b/examples/performance-thread/common/lthread_cond.h
@@ -0,0 +1,77 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software may have been derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef LTHREAD_COND_H_
+#define LTHREAD_COND_H_
+
+#include "lthread_queue.h"
+
+#define MAX_COND_NAME_SIZE 64
+
+struct lthread_cond {
+ struct lthread_queue *blocked;
+ struct lthread_sched *root_sched;
+ int count;
+ char name[MAX_COND_NAME_SIZE];
+ uint64_t diag_ref; /* optional ref to user diag data */
+} __rte_cache_aligned;
+
+#endif /* LTHREAD_COND_H_ */
diff --git a/examples/performance-thread/common/lthread_diag.c b/examples/performance-thread/common/lthread_diag.c
new file mode 100644
index 00000000..bce1a0c3
--- /dev/null
+++ b/examples/performance-thread/common/lthread_diag.c
@@ -0,0 +1,323 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_log.h>
+#include <rte_common.h>
+
+#include "lthread_diag.h"
+#include "lthread_queue.h"
+#include "lthread_pool.h"
+#include "lthread_objcache.h"
+#include "lthread_sched.h"
+#include "lthread_diag_api.h"
+
+
+/* dummy ref value of default diagnostic callback */
+static uint64_t dummy_ref;
+
+#define DIAG_SCHED_STATS_FORMAT \
+"core %d\n%33s %12s %12s %12s %12s\n"
+
+#define DIAG_CACHE_STATS_FORMAT \
+"%20s %12lu %12lu %12lu %12lu %12lu\n"
+
+#define DIAG_QUEUE_STATS_FORMAT \
+"%20s %12lu %12lu %12lu\n"
+
+
+/*
+ * texts used in diagnostic events,
+ * corresponding diagnostic mask bit positions are given as comment
+ */
+const char *diag_event_text[] = {
+ "LTHREAD_CREATE ", /* 00 */
+ "LTHREAD_EXIT ", /* 01 */
+ "LTHREAD_JOIN ", /* 02 */
+ "LTHREAD_CANCEL ", /* 03 */
+ "LTHREAD_DETACH ", /* 04 */
+ "LTHREAD_FREE ", /* 05 */
+ "LTHREAD_SUSPENDED ", /* 06 */
+ "LTHREAD_YIELD ", /* 07 */
+ "LTHREAD_RESCHEDULED", /* 08 */
+ "LTHREAD_SLEEP ", /* 09 */
+ "LTHREAD_RESUMED ", /* 10 */
+ "LTHREAD_AFFINITY ", /* 11 */
+ "LTHREAD_TMR_START ", /* 12 */
+ "LTHREAD_TMR_DELETE ", /* 13 */
+ "LTHREAD_TMR_EXPIRED", /* 14 */
+ "COND_CREATE ", /* 15 */
+ "COND_DESTROY ", /* 16 */
+ "COND_WAIT ", /* 17 */
+ "COND_SIGNAL ", /* 18 */
+ "COND_BROADCAST ", /* 19 */
+ "MUTEX_CREATE ", /* 20 */
+ "MUTEX_DESTROY ", /* 21 */
+ "MUTEX_LOCK ", /* 22 */
+ "MUTEX_TRYLOCK ", /* 23 */
+ "MUTEX_BLOCKED ", /* 24 */
+ "MUTEX_UNLOCKED ", /* 25 */
+ "SCHED_CREATE ", /* 26 */
+ "SCHED_SHUTDOWN " /* 27 */
+};
+
+
+/*
+ * set diagnostic ,ask
+ */
+void lthread_diagnostic_set_mask(DIAG_USED uint64_t mask)
+{
+#if LTHREAD_DIAG
+ diag_mask = mask;
+#else
+ RTE_LOG(INFO, LTHREAD,
+ "LTHREAD_DIAG is not set, see lthread_diag_api.h\n");
+#endif
+}
+
+
+/*
+ * Check consistency of the scheduler stats
+ * Only sensible run after the schedulers are stopped
+ * Count the number of objects lying in caches and queues
+ * and available in the qnode pool.
+ * This should be equal to the total capacity of all
+ * qnode pools.
+ */
+void
+_sched_stats_consistency_check(void);
+void
+_sched_stats_consistency_check(void)
+{
+#if LTHREAD_DIAG
+ int i;
+ struct lthread_sched *sched;
+ uint64_t count = 0;
+ uint64_t capacity = 0;
+
+ for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
+ sched = schedcore[i];
+ if (sched == NULL)
+ continue;
+
+ /* each of these queues consumes a stub node */
+ count += 8;
+ count += DIAG_COUNT(sched->ready, size);
+ count += DIAG_COUNT(sched->pready, size);
+ count += DIAG_COUNT(sched->lthread_cache, available);
+ count += DIAG_COUNT(sched->stack_cache, available);
+ count += DIAG_COUNT(sched->tls_cache, available);
+ count += DIAG_COUNT(sched->per_lthread_cache, available);
+ count += DIAG_COUNT(sched->cond_cache, available);
+ count += DIAG_COUNT(sched->mutex_cache, available);
+
+ /* the node pool does not consume a stub node */
+ if (sched->qnode_pool->fast_alloc != NULL)
+ count++;
+ count += DIAG_COUNT(sched->qnode_pool, available);
+
+ capacity += DIAG_COUNT(sched->qnode_pool, capacity);
+ }
+ if (count != capacity) {
+ RTE_LOG(CRIT, LTHREAD,
+ "Scheduler caches are inconsistent\n");
+ } else {
+ RTE_LOG(INFO, LTHREAD,
+ "Scheduler caches are ok\n");
+ }
+#endif
+}
+
+
+#if LTHREAD_DIAG
+/*
+ * Display node pool stats
+ */
+static inline void
+_qnode_pool_display(DIAG_USED struct qnode_pool *p)
+{
+
+ printf(DIAG_CACHE_STATS_FORMAT,
+ p->name,
+ DIAG_COUNT(p, rd),
+ DIAG_COUNT(p, wr),
+ DIAG_COUNT(p, available),
+ DIAG_COUNT(p, prealloc),
+ DIAG_COUNT(p, capacity));
+ fflush(stdout);
+}
+#endif
+
+
+#if LTHREAD_DIAG
+/*
+ * Display queue stats
+ */
+static inline void
+_lthread_queue_display(DIAG_USED struct lthread_queue *q)
+{
+#if DISPLAY_OBJCACHE_QUEUES
+ printf(DIAG_QUEUE_STATS_FORMAT,
+ q->name,
+ DIAG_COUNT(q, rd),
+ DIAG_COUNT(q, wr),
+ DIAG_COUNT(q, size));
+ fflush(stdout);
+#else
+ printf("%s: queue stats disabled\n",
+ q->name);
+
+#endif
+}
+#endif
+
+#if LTHREAD_DIAG
+/*
+ * Display objcache stats
+ */
+static inline void
+_objcache_display(DIAG_USED struct lthread_objcache *c)
+{
+
+ printf(DIAG_CACHE_STATS_FORMAT,
+ c->name,
+ DIAG_COUNT(c, rd),
+ DIAG_COUNT(c, wr),
+ DIAG_COUNT(c, available),
+ DIAG_COUNT(c, prealloc),
+ DIAG_COUNT(c, capacity));
+ _lthread_queue_display(c->q);
+ fflush(stdout);
+}
+#endif
+
+/*
+ * Display sched stats
+ */
+void
+lthread_sched_stats_display(void)
+{
+#if LTHREAD_DIAG
+ int i;
+ struct lthread_sched *sched;
+
+ for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
+ sched = schedcore[i];
+ if (sched != NULL) {
+ printf(DIAG_SCHED_STATS_FORMAT,
+ sched->lcore_id,
+ "rd",
+ "wr",
+ "present",
+ "nb preallocs",
+ "capacity");
+ _lthread_queue_display(sched->ready);
+ _lthread_queue_display(sched->pready);
+ _qnode_pool_display(sched->qnode_pool);
+ _objcache_display(sched->lthread_cache);
+ _objcache_display(sched->stack_cache);
+ _objcache_display(sched->tls_cache);
+ _objcache_display(sched->per_lthread_cache);
+ _objcache_display(sched->cond_cache);
+ _objcache_display(sched->mutex_cache);
+ fflush(stdout);
+ }
+ }
+ _sched_stats_consistency_check();
+#else
+ RTE_LOG(INFO, LTHREAD,
+ "lthread diagnostics disabled\n"
+ "hint - set LTHREAD_DIAG in lthread_diag_api.h\n");
+#endif
+}
+
+/*
+ * Defafult diagnostic callback
+ */
+static uint64_t
+_lthread_diag_default_cb(uint64_t time, struct lthread *lt, int diag_event,
+ uint64_t diag_ref, const char *text, uint64_t p1, uint64_t p2)
+{
+ uint64_t _p2;
+ int lcore = (int) rte_lcore_id();
+
+ switch (diag_event) {
+ case LT_DIAG_LTHREAD_CREATE:
+ case LT_DIAG_MUTEX_CREATE:
+ case LT_DIAG_COND_CREATE:
+ _p2 = dummy_ref;
+ break;
+ default:
+ _p2 = p2;
+ break;
+ }
+
+ printf("%"PRIu64" %d %8.8lx %8.8lx %s %8.8lx %8.8lx\n",
+ time,
+ lcore,
+ (uint64_t) lt,
+ diag_ref,
+ text,
+ p1,
+ _p2);
+
+ return dummy_ref++;
+}
+
+/*
+ * plug in default diag callback with mask off
+ */
+void _lthread_diag_ctor(void)__attribute__((constructor));
+void _lthread_diag_ctor(void)
+{
+ diag_cb = _lthread_diag_default_cb;
+ diag_mask = 0;
+}
+
+
+/*
+ * enable diagnostics
+ */
+void lthread_diagnostic_enable(DIAG_USED diag_callback cb,
+ DIAG_USED uint64_t mask)
+{
+#if LTHREAD_DIAG
+ if (cb == NULL)
+ diag_cb = _lthread_diag_default_cb;
+ else
+ diag_cb = cb;
+ diag_mask = mask;
+#else
+ RTE_LOG(INFO, LTHREAD,
+ "LTHREAD_DIAG is not set, see lthread_diag_api.h\n");
+#endif
+}
diff --git a/examples/performance-thread/common/lthread_diag.h b/examples/performance-thread/common/lthread_diag.h
new file mode 100644
index 00000000..2877d311
--- /dev/null
+++ b/examples/performance-thread/common/lthread_diag.h
@@ -0,0 +1,132 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LTHREAD_DIAG_H_
+#define LTHREAD_DIAG_H_
+
+#include <stdint.h>
+#include <inttypes.h>
+
+#include <rte_log.h>
+#include <rte_common.h>
+
+#include "lthread_api.h"
+#include "lthread_diag_api.h"
+
+extern diag_callback diag_cb;
+
+extern const char *diag_event_text[];
+extern uint64_t diag_mask;
+
+/* max size of name strings */
+#define LT_MAX_NAME_SIZE 64
+
+#if LTHREAD_DIAG
+#define DISPLAY_OBJCACHE_QUEUES 1
+
+/*
+ * Generate a diagnostic trace or event in the case where an object is created.
+ *
+ * The value returned by the callback is stored in the object.
+ *
+ * @ param obj
+ * pointer to the object that was created
+ * @ param ev
+ * the event code
+ *
+ */
+#define DIAG_CREATE_EVENT(obj, ev) do { \
+ struct lthread *ct = RTE_PER_LCORE(this_sched)->current_lthread;\
+ if ((BIT(ev) & diag_mask) && (ev < LT_DIAG_EVENT_MAX)) { \
+ (obj)->diag_ref = (diag_cb)(rte_rdtsc(), \
+ ct, \
+ (ev), \
+ 0, \
+ diag_event_text[(ev)], \
+ (uint64_t)obj, \
+ 0); \
+ } \
+} while (0)
+
+/*
+ * Generate a diagnostic trace event.
+ *
+ * @ param obj
+ * pointer to the lthread, cond or mutex object
+ * @ param ev
+ * the event code
+ * @ param p1
+ * object specific value ( see lthread_diag_api.h )
+ * @ param p2
+ * object specific value ( see lthread_diag_api.h )
+ */
+#define DIAG_EVENT(obj, ev, p1, p2) do { \
+ struct lthread *ct = RTE_PER_LCORE(this_sched)->current_lthread;\
+ if ((BIT(ev) & diag_mask) && (ev < LT_DIAG_EVENT_MAX)) { \
+ (diag_cb)(rte_rdtsc(), \
+ ct, \
+ ev, \
+ (obj)->diag_ref, \
+ diag_event_text[(ev)], \
+ (uint64_t)(p1), \
+ (uint64_t)(p2)); \
+ } \
+} while (0)
+
+#define DIAG_COUNT_DEFINE(x) rte_atomic64_t count_##x
+#define DIAG_COUNT_INIT(o, x) rte_atomic64_init(&((o)->count_##x))
+#define DIAG_COUNT_INC(o, x) rte_atomic64_inc(&((o)->count_##x))
+#define DIAG_COUNT_DEC(o, x) rte_atomic64_dec(&((o)->count_##x))
+#define DIAG_COUNT(o, x) rte_atomic64_read(&((o)->count_##x))
+
+#define DIAG_USED
+
+#else
+
+/* no diagnostics configured */
+
+#define DISPLAY_OBJCACHE_QUEUES 0
+
+#define DIAG_CREATE_EVENT(obj, ev)
+#define DIAG_EVENT(obj, ev, p1, p)
+
+#define DIAG_COUNT_DEFINE(x)
+#define DIAG_COUNT_INIT(o, x) do {} while (0)
+#define DIAG_COUNT_INC(o, x) do {} while (0)
+#define DIAG_COUNT_DEC(o, x) do {} while (0)
+#define DIAG_COUNT(o, x) 0
+
+#define DIAG_USED __rte_unused
+
+#endif /* LTHREAD_DIAG */
+#endif /* LTHREAD_DIAG_H_ */
diff --git a/examples/performance-thread/common/lthread_diag_api.h b/examples/performance-thread/common/lthread_diag_api.h
new file mode 100644
index 00000000..7ee514f8
--- /dev/null
+++ b/examples/performance-thread/common/lthread_diag_api.h
@@ -0,0 +1,325 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LTHREAD_DIAG_API_H_
+#define LTHREAD_DIAG_API_H_
+
+#include <stdint.h>
+#include <inttypes.h>
+
+/*
+ * Enable diagnostics
+ * 0 = conditionally compiled out
+ * 1 = compiled in and maskable at run time, see below for details
+ */
+#define LTHREAD_DIAG 0
+
+/**
+ *
+ * @file lthread_diag_api.h
+ *
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice
+ *
+ * lthread diagnostic interface
+ *
+ * If enabled via configuration file option ( tbd ) the lthread subsystem
+ * can generate selected trace information, either RTE_LOG (INFO) messages,
+ * or else invoke a user supplied callback function when any of the events
+ * listed below occur.
+ *
+ * Reporting of events can be selectively masked, the bit position in the
+ * mask is determined by the corresponding event identifier listed below.
+ *
+ * Diagnostics are enabled by registering the callback function and mask
+ * using the API lthread_diagnostic_enable().
+ *
+ * Various interesting parameters are passed to the callback, including the
+ * time in cpu clks, the lthread id, the diagnostic event id, a user ref value,
+ * event text string, object being traced, and two context dependent parameters
+ * (p1 and p2). The meaning of the two parameters p1 and p2 depends on
+ * the specific event.
+ *
+ * The events LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE and
+ * LT_DIAG_COND_CREATE are implicitly enabled if the event mask includes any of
+ * the LT_DIAG_LTHREAD_XXX, LT_DIAG_MUTEX_XXX or LT_DIAG_COND_XXX events
+ * respectively.
+ *
+ * These create events may also be included in the mask discreetly if it is
+ * desired to monitor only create events.
+ *
+ * @param time
+ * The time in cpu clks at which the event occurred
+ *
+ * @param lthread
+ * The current lthread
+ *
+ * @param diag_event
+ * The diagnostic event id (bit position in the mask)
+ *
+ * @param diag_ref
+ *
+ * For LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE or LT_DIAG_COND_CREATE
+ * this parameter is not used and set to 0.
+ * All other events diag_ref contains the user ref value returned by the
+ * callback function when lthread is created.
+ *
+ * The diag_ref values assigned to mutex and cond var can be retrieved
+ * using the APIs lthread_mutex_diag_ref(), and lthread_cond_diag_ref()
+ * respectively.
+ *
+ * @param p1
+ * see below
+ *
+ * @param p1
+ * see below
+ *
+ * @returns
+ * For LT_DIAG_LTHREAD_CREATE, LT_DIAG_MUTEX_CREATE or LT_DIAG_COND_CREATE
+ * expects a user diagnostic ref value that will be saved in the lthread, mutex
+ * or cond var.
+ *
+ * For all other events return value is ignored.
+ *
+ * LT_DIAG_SCHED_CREATE - Invoked when a scheduler is created
+ * p1 = the scheduler that was created
+ * p2 = not used
+ * return value will be ignored
+ *
+ * LT_DIAG_SCHED_SHUTDOWN - Invoked when a shutdown request is received
+ * p1 = the scheduler to be shutdown
+ * p2 = not used
+ * return value will be ignored
+ *
+ * LT_DIAG_LTHREAD_CREATE - Invoked when a thread is created
+ * p1 = the lthread that was created
+ * p2 = not used
+ * return value will be stored in the lthread
+ *
+ * LT_DIAG_LTHREAD_EXIT - Invoked when a lthread exits
+ * p2 = 0 if the thread was already joined
+ * p2 = 1 if the thread was not already joined
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_JOIN - Invoked when a lthread exits
+ * p1 = the lthread that is being joined
+ * p2 = 0 if the thread was already exited
+ * p2 = 1 if the thread was not already exited
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_CANCELLED - Invoked when an lthread is cancelled
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_DETACH - Invoked when an lthread is detached
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_FREE - Invoked when an lthread is freed
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_SUSPENDED - Invoked when an lthread is suspended
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_YIELD - Invoked when an lthread explicitly yields
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_RESCHEDULED - Invoked when an lthread is rescheduled
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_RESUMED - Invoked when an lthread is resumed
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_AFFINITY - Invoked when an lthread is affinitised
+ * p1 = the destination lcore_id
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_TMR_START - Invoked when an lthread starts a timer
+ * p1 = address of timer node
+ * p2 = the timeout value
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_TMR_DELETE - Invoked when an lthread deletes a timer
+ * p1 = address of the timer node
+ * p2 = 0 the timer and the was successfully deleted
+ * p2 = not usee
+ * return val ignored
+ *
+ * LT_DIAG_LTHREAD_TMR_EXPIRED - Invoked when an lthread timer expires
+ * p1 = address of scheduler the timer expired on
+ * p2 = the thread associated with the timer
+ * return val ignored
+ *
+ * LT_DIAG_COND_CREATE - Invoked when a condition variable is created
+ * p1 = address of cond var that was created
+ * p2 = not used
+ * return diag ref value will be stored in the condition variable
+ *
+ * LT_DIAG_COND_DESTROY - Invoked when a condition variable is destroyed
+ * p1 = not used
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_COND_WAIT - Invoked when an lthread waits on a cond var
+ * p1 = the address of the condition variable
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_COND_SIGNAL - Invoked when an lthread signals a cond var
+ * p1 = the address of the cond var
+ * p2 = the lthread that was signalled, or error code
+ * return val ignored
+ *
+ * LT_DIAG_COND_BROADCAST - Invoked when an lthread broadcasts a cond var
+ * p1 = the address of the condition variable
+ * p2 = the lthread(s) that are signalled, or error code
+ *
+ * LT_DIAG_MUTEX_CREATE - Invoked when a mutex is created
+ * p1 = address of muex
+ * p2 = not used
+ * return diag ref value will be stored in the mutex variable
+ *
+ * LT_DIAG_MUTEX_DESTROY - Invoked when a mutex is destroyed
+ * p1 = address of mutex
+ * p2 = not used
+ * return val ignored
+ *
+ * LT_DIAG_MUTEX_LOCK - Invoked when a mutex lock is obtained
+ * p1 = address of mutex
+ * p2 = function return value
+ * return val ignored
+ *
+ * LT_DIAG_MUTEX_BLOCKED - Invoked when an lthread blocks on a mutex
+ * p1 = address of mutex
+ * p2 = function return value
+ * return val ignored
+ *
+ * LT_DIAG_MUTEX_TRYLOCK - Invoked when a mutex try lock is attempted
+ * p1 = address of mutex
+ * p2 = the function return value
+ * return val ignored
+ *
+ * LT_DIAG_MUTEX_UNLOCKED - Invoked when a mutex is unlocked
+ * p1 = address of mutex
+ * p2 = the thread that was unlocked, or error code
+ * return val ignored
+ */
+typedef uint64_t (*diag_callback) (uint64_t time, struct lthread *lt,
+ int diag_event, uint64_t diag_ref,
+ const char *text, uint64_t p1, uint64_t p2);
+
+/*
+ * Set user diagnostic callback and mask
+ * If the callback function pointer is NULL the default
+ * callback handler will be restored.
+ */
+void lthread_diagnostic_enable(diag_callback cb, uint64_t diag_mask);
+
+/*
+ * Set diagnostic mask
+ */
+void lthread_diagnostic_set_mask(uint64_t mask);
+
+/*
+ * lthread diagnostic callback
+ */
+enum lthread_diag_ev {
+ /* bits 0 - 14 lthread flag group */
+ LT_DIAG_LTHREAD_CREATE, /* 00 mask 0x00000001 */
+ LT_DIAG_LTHREAD_EXIT, /* 01 mask 0x00000002 */
+ LT_DIAG_LTHREAD_JOIN, /* 02 mask 0x00000004 */
+ LT_DIAG_LTHREAD_CANCEL, /* 03 mask 0x00000008 */
+ LT_DIAG_LTHREAD_DETACH, /* 04 mask 0x00000010 */
+ LT_DIAG_LTHREAD_FREE, /* 05 mask 0x00000020 */
+ LT_DIAG_LTHREAD_SUSPENDED, /* 06 mask 0x00000040 */
+ LT_DIAG_LTHREAD_YIELD, /* 07 mask 0x00000080 */
+ LT_DIAG_LTHREAD_RESCHEDULED, /* 08 mask 0x00000100 */
+ LT_DIAG_LTHREAD_SLEEP, /* 09 mask 0x00000200 */
+ LT_DIAG_LTHREAD_RESUMED, /* 10 mask 0x00000400 */
+ LT_DIAG_LTHREAD_AFFINITY, /* 11 mask 0x00000800 */
+ LT_DIAG_LTHREAD_TMR_START, /* 12 mask 0x00001000 */
+ LT_DIAG_LTHREAD_TMR_DELETE, /* 13 mask 0x00002000 */
+ LT_DIAG_LTHREAD_TMR_EXPIRED, /* 14 mask 0x00004000 */
+ /* bits 15 - 19 conditional variable flag group */
+ LT_DIAG_COND_CREATE, /* 15 mask 0x00008000 */
+ LT_DIAG_COND_DESTROY, /* 16 mask 0x00010000 */
+ LT_DIAG_COND_WAIT, /* 17 mask 0x00020000 */
+ LT_DIAG_COND_SIGNAL, /* 18 mask 0x00040000 */
+ LT_DIAG_COND_BROADCAST, /* 19 mask 0x00080000 */
+ /* bits 20 - 25 mutex flag group */
+ LT_DIAG_MUTEX_CREATE, /* 20 mask 0x00100000 */
+ LT_DIAG_MUTEX_DESTROY, /* 21 mask 0x00200000 */
+ LT_DIAG_MUTEX_LOCK, /* 22 mask 0x00400000 */
+ LT_DIAG_MUTEX_TRYLOCK, /* 23 mask 0x00800000 */
+ LT_DIAG_MUTEX_BLOCKED, /* 24 mask 0x01000000 */
+ LT_DIAG_MUTEX_UNLOCKED, /* 25 mask 0x02000000 */
+ /* bits 26 - 27 scheduler flag group - 8 bits */
+ LT_DIAG_SCHED_CREATE, /* 26 mask 0x04000000 */
+ LT_DIAG_SCHED_SHUTDOWN, /* 27 mask 0x08000000 */
+ LT_DIAG_EVENT_MAX
+};
+
+#define LT_DIAG_ALL 0xffffffffffffffff
+
+
+/*
+ * Display scheduler stats
+ */
+void
+lthread_sched_stats_display(void);
+
+/*
+ * return the diagnostic ref val stored in a condition var
+ */
+uint64_t
+lthread_cond_diag_ref(struct lthread_cond *c);
+
+/*
+ * return the diagnostic ref val stored in a mutex
+ */
+uint64_t
+lthread_mutex_diag_ref(struct lthread_mutex *m);
+
+#endif /* LTHREAD_DIAG_API_H_ */
diff --git a/examples/performance-thread/common/lthread_int.h b/examples/performance-thread/common/lthread_int.h
new file mode 100644
index 00000000..c8357f4a
--- /dev/null
+++ b/examples/performance-thread/common/lthread_int.h
@@ -0,0 +1,212 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software may have been derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+#ifndef LTHREAD_INT_H
+#include <lthread_api.h>
+#define LTHREAD_INT_H
+
+#include <stdint.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <pthread.h>
+#include <time.h>
+
+#include <rte_cycles.h>
+#include <rte_per_lcore.h>
+#include <rte_timer.h>
+#include <rte_ring.h>
+#include <rte_atomic_64.h>
+#include <rte_spinlock.h>
+#include <ctx.h>
+
+#include <lthread_api.h>
+#include "lthread.h"
+#include "lthread_diag.h"
+#include "lthread_tls.h"
+
+struct lthread;
+struct lthread_sched;
+struct lthread_cond;
+struct lthread_mutex;
+struct lthread_key;
+
+struct key_pool;
+struct qnode;
+struct qnode_pool;
+struct lthread_sched;
+struct lthread_tls;
+
+
+#define BIT(x) (1 << (x))
+#define CLEARBIT(x) ~(1 << (x))
+
+#define POSIX_ERRNO(x) (x)
+
+#define MAX_LTHREAD_NAME_SIZE 64
+
+#define RTE_LOGTYPE_LTHREAD RTE_LOGTYPE_USER1
+
+
+/* define some shorthand for current scheduler and current thread */
+#define THIS_SCHED RTE_PER_LCORE(this_sched)
+#define THIS_LTHREAD RTE_PER_LCORE(this_sched)->current_lthread
+
+/*
+ * Definition of an scheduler struct
+ */
+struct lthread_sched {
+ struct ctx ctx; /* cpu context */
+ uint64_t birth; /* time created */
+ struct lthread *current_lthread; /* running thread */
+ unsigned lcore_id; /* this sched lcore */
+ int run_flag; /* sched shutdown */
+ uint64_t nb_blocked_threads; /* blocked threads */
+ struct lthread_queue *ready; /* local ready queue */
+ struct lthread_queue *pready; /* peer ready queue */
+ struct lthread_objcache *lthread_cache; /* free lthreads */
+ struct lthread_objcache *stack_cache; /* free stacks */
+ struct lthread_objcache *per_lthread_cache; /* free per lthread */
+ struct lthread_objcache *tls_cache; /* free TLS */
+ struct lthread_objcache *cond_cache; /* free cond vars */
+ struct lthread_objcache *mutex_cache; /* free mutexes */
+ struct qnode_pool *qnode_pool; /* pool of queue nodes */
+ struct key_pool *key_pool; /* pool of free TLS keys */
+ size_t stack_size;
+ uint64_t diag_ref; /* diag ref */
+} __rte_cache_aligned;
+
+RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched);
+
+
+/*
+ * State for an lthread
+ */
+enum lthread_st {
+ ST_LT_INIT, /* initial state */
+ ST_LT_READY, /* lthread is ready to run */
+ ST_LT_SLEEPING, /* lthread is sleeping */
+ ST_LT_EXPIRED, /* lthread timeout has expired */
+ ST_LT_EXITED, /* lthread has exited and needs cleanup */
+ ST_LT_DETACH, /* lthread frees on exit*/
+ ST_LT_CANCELLED, /* lthread has been cancelled */
+};
+
+/*
+ * lthread sub states for exit/join
+ */
+enum join_st {
+ LT_JOIN_INITIAL, /* initial state */
+ LT_JOIN_EXITING, /* thread is exiting */
+ LT_JOIN_THREAD_SET, /* joining thread has been set */
+ LT_JOIN_EXIT_VAL_SET, /* exiting thread has set ret val */
+ LT_JOIN_EXIT_VAL_READ, /* joining thread has collected ret val */
+};
+
+/* defnition of an lthread stack object */
+struct lthread_stack {
+ uint8_t stack[LTHREAD_MAX_STACK_SIZE];
+ size_t stack_size;
+ struct lthread_sched *root_sched;
+} __rte_cache_aligned;
+
+/*
+ * Definition of an lthread
+ */
+struct lthread {
+ struct ctx ctx; /* cpu context */
+
+ uint64_t state; /* current lthread state */
+
+ struct lthread_sched *sched; /* current scheduler */
+ void *stack; /* ptr to actual stack */
+ size_t stack_size; /* current stack_size */
+ size_t last_stack_size; /* last yield stack_size */
+ lthread_func_t fun; /* func ctx is running */
+ void *arg; /* func args passed to func */
+ void *per_lthread_data; /* per lthread user data */
+ lthread_exit_func exit_handler; /* called when thread exits */
+ uint64_t birth; /* time lthread was born */
+ struct lthread_queue *pending_wr_queue; /* deferred queue to write */
+ struct lthread *lt_join; /* lthread to join on */
+ uint64_t join; /* state for joining */
+ void **lt_exit_ptr; /* exit ptr for lthread_join */
+ struct lthread_sched *root_sched; /* thread was created here*/
+ struct queue_node *qnode; /* node when in a queue */
+ struct rte_timer tim; /* sleep timer */
+ struct lthread_tls *tls; /* keys in use by the thread */
+ struct lthread_stack *stack_container; /* stack */
+ char funcname[MAX_LTHREAD_NAME_SIZE]; /* thread func name */
+ uint64_t diag_ref; /* ref to user diag data */
+} __rte_cache_aligned;
+
+/*
+ * Assert
+ */
+#if LTHREAD_DIAG
+#define LTHREAD_ASSERT(expr) do { \
+ if (!(expr)) \
+ rte_panic("line%d\tassert \"" #expr "\" failed\n", __LINE__);\
+} while (0)
+#else
+#define LTHREAD_ASSERT(expr) do {} while (0)
+#endif
+
+#endif /* LTHREAD_INT_H */
diff --git a/examples/performance-thread/common/lthread_mutex.c b/examples/performance-thread/common/lthread_mutex.c
new file mode 100644
index 00000000..af8b82d2
--- /dev/null
+++ b/examples/performance-thread/common/lthread_mutex.c
@@ -0,0 +1,254 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+
+#include <rte_per_lcore.h>
+#include <rte_log.h>
+#include <rte_spinlock.h>
+#include <rte_common.h>
+
+#include "lthread_api.h"
+#include "lthread_int.h"
+#include "lthread_mutex.h"
+#include "lthread_sched.h"
+#include "lthread_queue.h"
+#include "lthread_objcache.h"
+#include "lthread_diag.h"
+
+/*
+ * Create a mutex
+ */
+int
+lthread_mutex_init(char *name, struct lthread_mutex **mutex,
+ __rte_unused const struct lthread_mutexattr *attr)
+{
+ struct lthread_mutex *m;
+
+ if (mutex == NULL)
+ return POSIX_ERRNO(EINVAL);
+
+
+ m = _lthread_objcache_alloc((THIS_SCHED)->mutex_cache);
+ if (m == NULL)
+ return POSIX_ERRNO(EAGAIN);
+
+ m->blocked = _lthread_queue_create("blocked queue");
+ if (m->blocked == NULL) {
+ _lthread_objcache_free((THIS_SCHED)->mutex_cache, m);
+ return POSIX_ERRNO(EAGAIN);
+ }
+
+ if (name == NULL)
+ strncpy(m->name, "no name", sizeof(m->name));
+ else
+ strncpy(m->name, name, sizeof(m->name));
+ m->name[sizeof(m->name)-1] = 0;
+
+ m->root_sched = THIS_SCHED;
+ m->owner = NULL;
+
+ rte_atomic64_init(&m->count);
+
+ DIAG_CREATE_EVENT(m, LT_DIAG_MUTEX_CREATE);
+ /* success */
+ (*mutex) = m;
+ return 0;
+}
+
+/*
+ * Destroy a mutex
+ */
+int lthread_mutex_destroy(struct lthread_mutex *m)
+{
+ if ((m == NULL) || (m->blocked == NULL)) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ if (m->owner == NULL) {
+ /* try to delete the blocked queue */
+ if (_lthread_queue_destroy(m->blocked) < 0) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY,
+ m, POSIX_ERRNO(EBUSY));
+ return POSIX_ERRNO(EBUSY);
+ }
+
+ /* free the mutex to cache */
+ _lthread_objcache_free(m->root_sched->mutex_cache, m);
+ DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, 0);
+ return 0;
+ }
+ /* can't do its still in use */
+ DIAG_EVENT(m, LT_DIAG_MUTEX_DESTROY, m, POSIX_ERRNO(EBUSY));
+ return POSIX_ERRNO(EBUSY);
+}
+
+/*
+ * Try to obtain a mutex
+ */
+int lthread_mutex_lock(struct lthread_mutex *m)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ if ((m == NULL) || (m->blocked == NULL)) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ /* allow no recursion */
+ if (m->owner == lt) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, POSIX_ERRNO(EDEADLK));
+ return POSIX_ERRNO(EDEADLK);
+ }
+
+ for (;;) {
+ rte_atomic64_inc(&m->count);
+ do {
+ if (rte_atomic64_cmpset
+ ((uint64_t *) &m->owner, 0, (uint64_t) lt)) {
+ /* happy days, we got the lock */
+ DIAG_EVENT(m, LT_DIAG_MUTEX_LOCK, m, 0);
+ return 0;
+ }
+ /* spin due to race with unlock when
+ * nothing was blocked
+ */
+ } while ((rte_atomic64_read(&m->count) == 1) &&
+ (m->owner == NULL));
+
+ /* queue the current thread in the blocked queue
+ * we defer this to after we return to the scheduler
+ * to ensure that the current thread context is saved
+ * before unlock could result in it being dequeued and
+ * resumed
+ */
+ DIAG_EVENT(m, LT_DIAG_MUTEX_BLOCKED, m, lt);
+ lt->pending_wr_queue = m->blocked;
+ /* now relinquish cpu */
+ _suspend();
+ /* resumed, must loop and compete for the lock again */
+ }
+ LTHREAD_ASSERT(0);
+ return 0;
+}
+
+/* try to lock a mutex but dont block */
+int lthread_mutex_trylock(struct lthread_mutex *m)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ if ((m == NULL) || (m->blocked == NULL)) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ if (m->owner == lt) {
+ /* no recursion */
+ DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EDEADLK));
+ return POSIX_ERRNO(EDEADLK);
+ }
+
+ rte_atomic64_inc(&m->count);
+ if (rte_atomic64_cmpset
+ ((uint64_t *) &m->owner, (uint64_t) NULL, (uint64_t) lt)) {
+ /* got the lock */
+ DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, 0);
+ return 0;
+ }
+
+ /* failed so return busy */
+ rte_atomic64_dec(&m->count);
+ DIAG_EVENT(m, LT_DIAG_MUTEX_TRYLOCK, m, POSIX_ERRNO(EBUSY));
+ return POSIX_ERRNO(EBUSY);
+}
+
+/*
+ * Unlock a mutex
+ */
+int lthread_mutex_unlock(struct lthread_mutex *m)
+{
+ struct lthread *lt = THIS_LTHREAD;
+ struct lthread *unblocked;
+
+ if ((m == NULL) || (m->blocked == NULL)) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, POSIX_ERRNO(EINVAL));
+ return POSIX_ERRNO(EINVAL);
+ }
+
+ /* fail if its owned */
+ if (m->owner != lt || m->owner == NULL) {
+ DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, POSIX_ERRNO(EPERM));
+ return POSIX_ERRNO(EPERM);
+ }
+
+ rte_atomic64_dec(&m->count);
+ /* if there are blocked threads then make one ready */
+ while (rte_atomic64_read(&m->count) > 0) {
+ unblocked = _lthread_queue_remove(m->blocked);
+
+ if (unblocked != NULL) {
+ rte_atomic64_dec(&m->count);
+ DIAG_EVENT(m, LT_DIAG_MUTEX_UNLOCKED, m, unblocked);
+ LTHREAD_ASSERT(unblocked->sched != NULL);
+ _ready_queue_insert((struct lthread_sched *)
+ unblocked->sched, unblocked);
+ break;
+ }
+ }
+ /* release the lock */
+ m->owner = NULL;
+ return 0;
+}
+
+/*
+ * return the diagnostic ref val stored in a mutex
+ */
+uint64_t
+lthread_mutex_diag_ref(struct lthread_mutex *m)
+{
+ if (m == NULL)
+ return 0;
+ return m->diag_ref;
+}
diff --git a/examples/performance-thread/common/lthread_mutex.h b/examples/performance-thread/common/lthread_mutex.h
new file mode 100644
index 00000000..4d30b2e7
--- /dev/null
+++ b/examples/performance-thread/common/lthread_mutex.h
@@ -0,0 +1,52 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef LTHREAD_MUTEX_H_
+#define LTHREAD_MUTEX_H_
+
+#include "lthread_queue.h"
+
+
+#define MAX_MUTEX_NAME_SIZE 64
+
+struct lthread_mutex {
+ struct lthread *owner;
+ rte_atomic64_t count;
+ struct lthread_queue *blocked __rte_cache_aligned;
+ struct lthread_sched *root_sched;
+ char name[MAX_MUTEX_NAME_SIZE];
+ uint64_t diag_ref; /* optional ref to user diag data */
+} __rte_cache_aligned;
+
+#endif /* LTHREAD_MUTEX_H_ */
diff --git a/examples/performance-thread/common/lthread_objcache.h b/examples/performance-thread/common/lthread_objcache.h
new file mode 100644
index 00000000..d7e35825
--- /dev/null
+++ b/examples/performance-thread/common/lthread_objcache.h
@@ -0,0 +1,158 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef LTHREAD_OBJCACHE_H_
+#define LTHREAD_OBJCACHE_H_
+
+#include <string.h>
+
+#include <rte_per_lcore.h>
+#include <rte_malloc.h>
+#include <rte_memory.h>
+
+#include "lthread_int.h"
+#include "lthread_diag.h"
+#include "lthread_queue.h"
+
+
+RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched);
+
+struct lthread_objcache {
+ struct lthread_queue *q;
+ size_t obj_size;
+ int prealloc_size;
+ char name[LT_MAX_NAME_SIZE];
+
+ DIAG_COUNT_DEFINE(rd);
+ DIAG_COUNT_DEFINE(wr);
+ DIAG_COUNT_DEFINE(prealloc);
+ DIAG_COUNT_DEFINE(capacity);
+ DIAG_COUNT_DEFINE(available);
+};
+
+/*
+ * Create a cache
+ */
+static inline struct
+lthread_objcache *_lthread_objcache_create(const char *name,
+ size_t obj_size,
+ int prealloc_size)
+{
+ struct lthread_objcache *c =
+ rte_malloc_socket(NULL, sizeof(struct lthread_objcache),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+ if (c == NULL)
+ return NULL;
+
+ c->q = _lthread_queue_create("cache queue");
+ if (c->q == NULL) {
+ rte_free(c);
+ return NULL;
+ }
+ c->obj_size = obj_size;
+ c->prealloc_size = prealloc_size;
+
+ if (name != NULL)
+ strncpy(c->name, name, LT_MAX_NAME_SIZE);
+ c->name[sizeof(c->name)-1] = 0;
+
+ DIAG_COUNT_INIT(c, rd);
+ DIAG_COUNT_INIT(c, wr);
+ DIAG_COUNT_INIT(c, prealloc);
+ DIAG_COUNT_INIT(c, capacity);
+ DIAG_COUNT_INIT(c, available);
+ return c;
+}
+
+/*
+ * Destroy an objcache
+ */
+static inline int
+_lthread_objcache_destroy(struct lthread_objcache *c)
+{
+ if (_lthread_queue_destroy(c->q) == 0) {
+ rte_free(c);
+ return 0;
+ }
+ return -1;
+}
+
+/*
+ * Allocate an object from an object cache
+ */
+static inline void *
+_lthread_objcache_alloc(struct lthread_objcache *c)
+{
+ int i;
+ void *data;
+ struct lthread_queue *q = c->q;
+ size_t obj_size = c->obj_size;
+ int prealloc_size = c->prealloc_size;
+
+ data = _lthread_queue_remove(q);
+
+ if (data == NULL) {
+ DIAG_COUNT_INC(c, prealloc);
+ for (i = 0; i < prealloc_size; i++) {
+ data =
+ rte_zmalloc_socket(NULL, obj_size,
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+ if (data == NULL)
+ return NULL;
+
+ DIAG_COUNT_INC(c, available);
+ DIAG_COUNT_INC(c, capacity);
+ _lthread_queue_insert_mp(q, data);
+ }
+ data = _lthread_queue_remove(q);
+ }
+ DIAG_COUNT_INC(c, rd);
+ DIAG_COUNT_DEC(c, available);
+ return data;
+}
+
+/*
+ * free an object to a cache
+ */
+static inline void
+_lthread_objcache_free(struct lthread_objcache *c, void *obj)
+{
+ DIAG_COUNT_INC(c, wr);
+ DIAG_COUNT_INC(c, available);
+ _lthread_queue_insert_mp(c->q, obj);
+}
+
+
+
+#endif /* LTHREAD_OBJCACHE_H_ */
diff --git a/examples/performance-thread/common/lthread_pool.h b/examples/performance-thread/common/lthread_pool.h
new file mode 100644
index 00000000..a5f32515
--- /dev/null
+++ b/examples/performance-thread/common/lthread_pool.h
@@ -0,0 +1,332 @@
+/*
+ *-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the producer
+ * consumer queues described by Dmitry Vyukov and published here
+ * http://www.1024cores.net
+ *
+ * Copyright (c) 2010-2011 Dmitry Vyukov. All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY DMITRY VYUKOV "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DMITRY VYUKOV OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of Dmitry Vyukov.
+ */
+
+#ifndef LTHREAD_POOL_H_
+#define LTHREAD_POOL_H_
+
+#include <rte_malloc.h>
+#include <rte_per_lcore.h>
+#include <rte_log.h>
+
+#include "lthread_int.h"
+#include "lthread_diag.h"
+
+/*
+ * This file implements pool of queue nodes used by the queue implemented
+ * in lthread_queue.h.
+ *
+ * The pool is an intrusive lock free MPSC queue.
+ *
+ * The pool is created empty and populated lazily, i.e. on first attempt to
+ * allocate a the pool.
+ *
+ * Whenever the pool is empty more nodes are added to the pool
+ * The number of nodes preallocated in this way is a parameter of
+ * _qnode_pool_create. Freeing an object returns it to the pool.
+ *
+ * Each lthread scheduler maintains its own pool of nodes. L-threads must always
+ * allocate from this local pool ( because it is a single consumer queue ).
+ * L-threads can free nodes to any pool (because it is a multi producer queue)
+ * This enables threads that have affined to a different scheduler to free
+ * nodes safely.
+ */
+
+struct qnode;
+struct qnode_cache;
+
+/*
+ * define intermediate node
+ */
+struct qnode {
+ struct qnode *next;
+ void *data;
+ struct qnode_pool *pool;
+} __rte_cache_aligned;
+
+/*
+ * a pool structure
+ */
+struct qnode_pool {
+ struct qnode *head;
+ struct qnode *stub;
+ struct qnode *fast_alloc;
+ struct qnode *tail __rte_cache_aligned;
+ int pre_alloc;
+ char name[LT_MAX_NAME_SIZE];
+
+ DIAG_COUNT_DEFINE(rd);
+ DIAG_COUNT_DEFINE(wr);
+ DIAG_COUNT_DEFINE(available);
+ DIAG_COUNT_DEFINE(prealloc);
+ DIAG_COUNT_DEFINE(capacity);
+} __rte_cache_aligned;
+
+/*
+ * Create a pool of qnodes
+ */
+
+static inline struct qnode_pool *
+_qnode_pool_create(const char *name, int prealloc_size) {
+
+ struct qnode_pool *p = rte_malloc_socket(NULL,
+ sizeof(struct qnode_pool),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+
+ LTHREAD_ASSERT(p);
+
+ p->stub = rte_malloc_socket(NULL,
+ sizeof(struct qnode),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+
+ LTHREAD_ASSERT(p->stub);
+
+ if (name != NULL)
+ strncpy(p->name, name, LT_MAX_NAME_SIZE);
+ p->name[sizeof(p->name)-1] = 0;
+
+ p->stub->pool = p;
+ p->stub->next = NULL;
+ p->tail = p->stub;
+ p->head = p->stub;
+ p->pre_alloc = prealloc_size;
+
+ DIAG_COUNT_INIT(p, rd);
+ DIAG_COUNT_INIT(p, wr);
+ DIAG_COUNT_INIT(p, available);
+ DIAG_COUNT_INIT(p, prealloc);
+ DIAG_COUNT_INIT(p, capacity);
+
+ return p;
+}
+
+
+/*
+ * Insert a node into the pool
+ */
+static inline void __attribute__ ((always_inline))
+_qnode_pool_insert(struct qnode_pool *p, struct qnode *n)
+{
+ n->next = NULL;
+ struct qnode *prev = n;
+ /* We insert at the head */
+ prev = (struct qnode *) __sync_lock_test_and_set((uint64_t *)&p->head,
+ (uint64_t) prev);
+ /* there is a window of inconsistency until prev next is set */
+ /* which is why remove must retry */
+ prev->next = (n);
+}
+
+/*
+ * Remove a node from the pool
+ *
+ * There is a race with _qnode_pool_insert() whereby the queue could appear
+ * empty during a concurrent insert, this is handled by retrying
+ *
+ * The queue uses a stub node, which must be swung as the queue becomes
+ * empty, this requires an insert of the stub, which means that removing the
+ * last item from the queue incurs the penalty of an atomic exchange. Since the
+ * pool is maintained with a bulk pre-allocation the cost of this is amortised.
+ */
+static inline struct qnode *__attribute__ ((always_inline))
+_pool_remove(struct qnode_pool *p)
+{
+ struct qnode *head;
+ struct qnode *tail = p->tail;
+ struct qnode *next = tail->next;
+
+ /* we remove from the tail */
+ if (tail == p->stub) {
+ if (next == NULL)
+ return NULL;
+ /* advance the tail */
+ p->tail = next;
+ tail = next;
+ next = next->next;
+ }
+ if (likely(next != NULL)) {
+ p->tail = next;
+ return tail;
+ }
+
+ head = p->head;
+ if (tail == head)
+ return NULL;
+
+ /* swing stub node */
+ _qnode_pool_insert(p, p->stub);
+
+ next = tail->next;
+ if (next) {
+ p->tail = next;
+ return tail;
+ }
+ return NULL;
+}
+
+
+/*
+ * This adds a retry to the _pool_remove function
+ * defined above
+ */
+static inline struct qnode *__attribute__ ((always_inline))
+_qnode_pool_remove(struct qnode_pool *p)
+{
+ struct qnode *n;
+
+ do {
+ n = _pool_remove(p);
+ if (likely(n != NULL))
+ return n;
+
+ rte_compiler_barrier();
+ } while ((p->head != p->tail) &&
+ (p->tail != p->stub));
+ return NULL;
+}
+
+/*
+ * Allocate a node from the pool
+ * If the pool is empty add mode nodes
+ */
+static inline struct qnode *__attribute__ ((always_inline))
+_qnode_alloc(void)
+{
+ struct qnode_pool *p = (THIS_SCHED)->qnode_pool;
+ int prealloc_size = p->pre_alloc;
+ struct qnode *n;
+ int i;
+
+ if (likely(p->fast_alloc != NULL)) {
+ n = p->fast_alloc;
+ p->fast_alloc = NULL;
+ return n;
+ }
+
+ n = _qnode_pool_remove(p);
+
+ if (unlikely(n == NULL)) {
+ DIAG_COUNT_INC(p, prealloc);
+ for (i = 0; i < prealloc_size; i++) {
+ n = rte_malloc_socket(NULL,
+ sizeof(struct qnode),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+ if (n == NULL)
+ return NULL;
+
+ DIAG_COUNT_INC(p, available);
+ DIAG_COUNT_INC(p, capacity);
+
+ n->pool = p;
+ _qnode_pool_insert(p, n);
+ }
+ n = _qnode_pool_remove(p);
+ }
+ n->pool = p;
+ DIAG_COUNT_INC(p, rd);
+ DIAG_COUNT_DEC(p, available);
+ return n;
+}
+
+
+
+/*
+* free a queue node to the per scheduler pool from which it came
+*/
+static inline void __attribute__ ((always_inline))
+_qnode_free(struct qnode *n)
+{
+ struct qnode_pool *p = n->pool;
+
+
+ if (unlikely(p->fast_alloc != NULL) ||
+ unlikely(n->pool != (THIS_SCHED)->qnode_pool)) {
+ DIAG_COUNT_INC(p, wr);
+ DIAG_COUNT_INC(p, available);
+ _qnode_pool_insert(p, n);
+ return;
+ }
+ p->fast_alloc = n;
+}
+
+/*
+ * Destroy an qnode pool
+ * queue must be empty when this is called
+ */
+static inline int
+_qnode_pool_destroy(struct qnode_pool *p)
+{
+ rte_free(p->stub);
+ rte_free(p);
+ return 0;
+}
+
+
+#endif /* LTHREAD_POOL_H_ */
diff --git a/examples/performance-thread/common/lthread_queue.h b/examples/performance-thread/common/lthread_queue.h
new file mode 100644
index 00000000..0c395167
--- /dev/null
+++ b/examples/performance-thread/common/lthread_queue.h
@@ -0,0 +1,302 @@
+/*
+ *-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the producer
+ * consumer queues described by Dmitry Vyukov and published here
+ * http://www.1024cores.net
+ *
+ * Copyright (c) 2010-2011 Dmitry Vyukov. All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY DMITRY VYUKOV "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
+ * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL DMITRY VYUKOV OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY,
+ * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
+ * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
+ * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * The views and conclusions contained in the software and documentation are
+ * those of the authors and should not be interpreted as representing official
+ * policies, either expressed or implied, of Dmitry Vyukov.
+ */
+
+#ifndef LTHREAD_QUEUE_H_
+#define LTHREAD_QUEUE_H_
+
+#include <string.h>
+
+#include <rte_prefetch.h>
+#include <rte_per_lcore.h>
+
+#include "lthread_int.h"
+#include "lthread.h"
+#include "lthread_diag.h"
+#include "lthread_pool.h"
+
+struct lthread_queue;
+
+/*
+ * This file implements an unbounded FIFO queue based on a lock free
+ * linked list.
+ *
+ * The queue is non-intrusive in that it uses intermediate nodes, and does
+ * not require these nodes to be inserted into the object being placed
+ * in the queue.
+ *
+ * This is slightly more efficient than the very similar queue in lthread_pool
+ * in that it does not have to swing a stub node as the queue becomes empty.
+ *
+ * The queue access functions allocate and free intermediate node
+ * transparently from/to a per scheduler pool ( see lthread_pool.h ).
+ *
+ * The queue provides both MPSC and SPSC insert methods
+ */
+
+/*
+ * define a queue of lthread nodes
+ */
+struct lthread_queue {
+ struct qnode *head;
+ struct qnode *tail __rte_cache_aligned;
+ struct lthread_queue *p;
+ char name[LT_MAX_NAME_SIZE];
+
+ DIAG_COUNT_DEFINE(rd);
+ DIAG_COUNT_DEFINE(wr);
+ DIAG_COUNT_DEFINE(size);
+
+} __rte_cache_aligned;
+
+
+
+static inline struct lthread_queue *
+_lthread_queue_create(const char *name)
+{
+ struct qnode *stub;
+ struct lthread_queue *new_queue;
+
+ new_queue = rte_malloc_socket(NULL, sizeof(struct lthread_queue),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+ if (new_queue == NULL)
+ return NULL;
+
+ /* allocated stub node */
+ stub = _qnode_alloc();
+ LTHREAD_ASSERT(stub);
+
+ if (name != NULL)
+ strncpy(new_queue->name, name, sizeof(new_queue->name));
+ new_queue->name[sizeof(new_queue->name)-1] = 0;
+
+ /* initialize queue as empty */
+ stub->next = NULL;
+ new_queue->head = stub;
+ new_queue->tail = stub;
+
+ DIAG_COUNT_INIT(new_queue, rd);
+ DIAG_COUNT_INIT(new_queue, wr);
+ DIAG_COUNT_INIT(new_queue, size);
+
+ return new_queue;
+}
+
+/**
+ * Return true if the queue is empty
+ */
+static inline int __attribute__ ((always_inline))
+_lthread_queue_empty(struct lthread_queue *q)
+{
+ return q->tail == q->head;
+}
+
+
+
+/**
+ * Destroy a queue
+ * fail if queue is not empty
+ */
+static inline int _lthread_queue_destroy(struct lthread_queue *q)
+{
+ if (q == NULL)
+ return -1;
+
+ if (!_lthread_queue_empty(q))
+ return -1;
+
+ _qnode_free(q->head);
+ rte_free(q);
+ return 0;
+}
+
+RTE_DECLARE_PER_LCORE(struct lthread_sched *, this_sched);
+
+/*
+ * Insert a node into a queue
+ * this implementation is multi producer safe
+ */
+static inline struct qnode *__attribute__ ((always_inline))
+_lthread_queue_insert_mp(struct lthread_queue
+ *q, void *data)
+{
+ struct qnode *prev;
+ struct qnode *n = _qnode_alloc();
+
+ if (n == NULL)
+ return NULL;
+
+ /* set object in node */
+ n->data = data;
+ n->next = NULL;
+
+ /* this is an MPSC method, perform a locked update */
+ prev = n;
+ prev =
+ (struct qnode *)__sync_lock_test_and_set((uint64_t *) &(q)->head,
+ (uint64_t) prev);
+ /* there is a window of inconsistency until prev next is set,
+ * which is why remove must retry
+ */
+ prev->next = n;
+
+ DIAG_COUNT_INC(q, wr);
+ DIAG_COUNT_INC(q, size);
+
+ return n;
+}
+
+/*
+ * Insert an node into a queue in single producer mode
+ * this implementation is NOT mult producer safe
+ */
+static inline struct qnode *__attribute__ ((always_inline))
+_lthread_queue_insert_sp(struct lthread_queue
+ *q, void *data)
+{
+ /* allocate a queue node */
+ struct qnode *prev;
+ struct qnode *n = _qnode_alloc();
+
+ if (n == NULL)
+ return NULL;
+
+ /* set data in node */
+ n->data = data;
+ n->next = NULL;
+
+ /* this is an SPSC method, no need for locked exchange operation */
+ prev = q->head;
+ prev->next = q->head = n;
+
+ DIAG_COUNT_INC(q, wr);
+ DIAG_COUNT_INC(q, size);
+
+ return n;
+}
+
+/*
+ * Remove a node from a queue
+ */
+static inline void *__attribute__ ((always_inline))
+_lthread_queue_poll(struct lthread_queue *q)
+{
+ void *data = NULL;
+ struct qnode *tail = q->tail;
+ struct qnode *next = (struct qnode *)tail->next;
+ /*
+ * There is a small window of inconsistency between producer and
+ * consumer whereby the queue may appear empty if consumer and
+ * producer access it at the same time.
+ * The consumer must handle this by retrying
+ */
+
+ if (likely(next != NULL)) {
+ q->tail = next;
+ tail->data = next->data;
+ data = tail->data;
+
+ /* free the node */
+ _qnode_free(tail);
+
+ DIAG_COUNT_INC(q, rd);
+ DIAG_COUNT_DEC(q, size);
+ return data;
+ }
+ return NULL;
+}
+
+/*
+ * Remove a node from a queue
+ */
+static inline void *__attribute__ ((always_inline))
+_lthread_queue_remove(struct lthread_queue *q)
+{
+ void *data = NULL;
+
+ /*
+ * There is a small window of inconsistency between producer and
+ * consumer whereby the queue may appear empty if consumer and
+ * producer access it at the same time. We handle this by retrying
+ */
+ do {
+ data = _lthread_queue_poll(q);
+
+ if (likely(data != NULL)) {
+
+ DIAG_COUNT_INC(q, rd);
+ DIAG_COUNT_DEC(q, size);
+ return data;
+ }
+ rte_compiler_barrier();
+ } while (unlikely(!_lthread_queue_empty(q)));
+ return NULL;
+}
+
+
+#endif /* LTHREAD_QUEUE_H_ */
diff --git a/examples/performance-thread/common/lthread_sched.c b/examples/performance-thread/common/lthread_sched.c
new file mode 100644
index 00000000..7c40bc05
--- /dev/null
+++ b/examples/performance-thread/common/lthread_sched.c
@@ -0,0 +1,599 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+
+#define RTE_MEM 1
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stddef.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sched.h>
+
+#include <rte_prefetch.h>
+#include <rte_per_lcore.h>
+#include <rte_atomic.h>
+#include <rte_atomic_64.h>
+#include <rte_log.h>
+#include <rte_common.h>
+#include <rte_branch_prediction.h>
+
+#include "lthread_api.h"
+#include "lthread_int.h"
+#include "lthread_sched.h"
+#include "lthread_objcache.h"
+#include "lthread_timer.h"
+#include "lthread_mutex.h"
+#include "lthread_cond.h"
+#include "lthread_tls.h"
+#include "lthread_diag.h"
+
+/*
+ * This file implements the lthread scheduler
+ * The scheduler is the function lthread_run()
+ * This must be run as the main loop of an EAL thread.
+ *
+ * Currently once a scheduler is created it cannot be destroyed
+ * When a scheduler shuts down it is assumed that the application is terminating
+ */
+
+static rte_atomic16_t num_schedulers;
+static rte_atomic16_t active_schedulers;
+
+/* one scheduler per lcore */
+RTE_DEFINE_PER_LCORE(struct lthread_sched *, this_sched) = NULL;
+
+struct lthread_sched *schedcore[LTHREAD_MAX_LCORES];
+
+diag_callback diag_cb;
+
+uint64_t diag_mask;
+
+
+/* constructor */
+void lthread_sched_ctor(void) __attribute__ ((constructor));
+void lthread_sched_ctor(void)
+{
+ memset(schedcore, 0, sizeof(schedcore));
+ rte_atomic16_init(&num_schedulers);
+ rte_atomic16_set(&num_schedulers, 1);
+ rte_atomic16_init(&active_schedulers);
+ rte_atomic16_set(&active_schedulers, 0);
+ diag_cb = NULL;
+}
+
+
+enum sched_alloc_phase {
+ SCHED_ALLOC_OK,
+ SCHED_ALLOC_QNODE_POOL,
+ SCHED_ALLOC_READY_QUEUE,
+ SCHED_ALLOC_PREADY_QUEUE,
+ SCHED_ALLOC_LTHREAD_CACHE,
+ SCHED_ALLOC_STACK_CACHE,
+ SCHED_ALLOC_PERLT_CACHE,
+ SCHED_ALLOC_TLS_CACHE,
+ SCHED_ALLOC_COND_CACHE,
+ SCHED_ALLOC_MUTEX_CACHE,
+};
+
+static int
+_lthread_sched_alloc_resources(struct lthread_sched *new_sched)
+{
+ int alloc_status;
+
+ do {
+ /* Initialize per scheduler queue node pool */
+ alloc_status = SCHED_ALLOC_QNODE_POOL;
+ new_sched->qnode_pool =
+ _qnode_pool_create("qnode pool", LTHREAD_PREALLOC);
+ if (new_sched->qnode_pool == NULL)
+ break;
+
+ /* Initialize per scheduler local ready queue */
+ alloc_status = SCHED_ALLOC_READY_QUEUE;
+ new_sched->ready = _lthread_queue_create("ready queue");
+ if (new_sched->ready == NULL)
+ break;
+
+ /* Initialize per scheduler local peer ready queue */
+ alloc_status = SCHED_ALLOC_PREADY_QUEUE;
+ new_sched->pready = _lthread_queue_create("pready queue");
+ if (new_sched->pready == NULL)
+ break;
+
+ /* Initialize per scheduler local free lthread cache */
+ alloc_status = SCHED_ALLOC_LTHREAD_CACHE;
+ new_sched->lthread_cache =
+ _lthread_objcache_create("lthread cache",
+ sizeof(struct lthread),
+ LTHREAD_PREALLOC);
+ if (new_sched->lthread_cache == NULL)
+ break;
+
+ /* Initialize per scheduler local free stack cache */
+ alloc_status = SCHED_ALLOC_STACK_CACHE;
+ new_sched->stack_cache =
+ _lthread_objcache_create("stack_cache",
+ sizeof(struct lthread_stack),
+ LTHREAD_PREALLOC);
+ if (new_sched->stack_cache == NULL)
+ break;
+
+ /* Initialize per scheduler local free per lthread data cache */
+ alloc_status = SCHED_ALLOC_PERLT_CACHE;
+ new_sched->per_lthread_cache =
+ _lthread_objcache_create("per_lt cache",
+ RTE_PER_LTHREAD_SECTION_SIZE,
+ LTHREAD_PREALLOC);
+ if (new_sched->per_lthread_cache == NULL)
+ break;
+
+ /* Initialize per scheduler local free tls cache */
+ alloc_status = SCHED_ALLOC_TLS_CACHE;
+ new_sched->tls_cache =
+ _lthread_objcache_create("TLS cache",
+ sizeof(struct lthread_tls),
+ LTHREAD_PREALLOC);
+ if (new_sched->tls_cache == NULL)
+ break;
+
+ /* Initialize per scheduler local free cond var cache */
+ alloc_status = SCHED_ALLOC_COND_CACHE;
+ new_sched->cond_cache =
+ _lthread_objcache_create("cond cache",
+ sizeof(struct lthread_cond),
+ LTHREAD_PREALLOC);
+ if (new_sched->cond_cache == NULL)
+ break;
+
+ /* Initialize per scheduler local free mutex cache */
+ alloc_status = SCHED_ALLOC_MUTEX_CACHE;
+ new_sched->mutex_cache =
+ _lthread_objcache_create("mutex cache",
+ sizeof(struct lthread_mutex),
+ LTHREAD_PREALLOC);
+ if (new_sched->mutex_cache == NULL)
+ break;
+
+ alloc_status = SCHED_ALLOC_OK;
+ } while (0);
+
+ /* roll back on any failure */
+ switch (alloc_status) {
+ case SCHED_ALLOC_MUTEX_CACHE:
+ _lthread_objcache_destroy(new_sched->cond_cache);
+ /* fall through */
+ case SCHED_ALLOC_COND_CACHE:
+ _lthread_objcache_destroy(new_sched->tls_cache);
+ /* fall through */
+ case SCHED_ALLOC_TLS_CACHE:
+ _lthread_objcache_destroy(new_sched->per_lthread_cache);
+ /* fall through */
+ case SCHED_ALLOC_PERLT_CACHE:
+ _lthread_objcache_destroy(new_sched->stack_cache);
+ /* fall through */
+ case SCHED_ALLOC_STACK_CACHE:
+ _lthread_objcache_destroy(new_sched->lthread_cache);
+ /* fall through */
+ case SCHED_ALLOC_LTHREAD_CACHE:
+ _lthread_queue_destroy(new_sched->pready);
+ /* fall through */
+ case SCHED_ALLOC_PREADY_QUEUE:
+ _lthread_queue_destroy(new_sched->ready);
+ /* fall through */
+ case SCHED_ALLOC_READY_QUEUE:
+ _qnode_pool_destroy(new_sched->qnode_pool);
+ /* fall through */
+ case SCHED_ALLOC_QNODE_POOL:
+ /* fall through */
+ case SCHED_ALLOC_OK:
+ break;
+ }
+ return alloc_status;
+}
+
+
+/*
+ * Create a scheduler on the current lcore
+ */
+struct lthread_sched *_lthread_sched_create(size_t stack_size)
+{
+ int status;
+ struct lthread_sched *new_sched;
+ unsigned lcoreid = rte_lcore_id();
+
+ LTHREAD_ASSERT(stack_size <= LTHREAD_MAX_STACK_SIZE);
+
+ if (stack_size == 0)
+ stack_size = LTHREAD_MAX_STACK_SIZE;
+
+ new_sched =
+ rte_calloc_socket(NULL, 1, sizeof(struct lthread_sched),
+ RTE_CACHE_LINE_SIZE,
+ rte_socket_id());
+ if (new_sched == NULL) {
+ RTE_LOG(CRIT, LTHREAD,
+ "Failed to allocate memory for scheduler\n");
+ return NULL;
+ }
+
+ _lthread_key_pool_init();
+
+ new_sched->stack_size = stack_size;
+ new_sched->birth = rte_rdtsc();
+ THIS_SCHED = new_sched;
+
+ status = _lthread_sched_alloc_resources(new_sched);
+ if (status != SCHED_ALLOC_OK) {
+ RTE_LOG(CRIT, LTHREAD,
+ "Failed to allocate resources for scheduler code = %d\n",
+ status);
+ rte_free(new_sched);
+ return NULL;
+ }
+
+ bzero(&new_sched->ctx, sizeof(struct ctx));
+
+ new_sched->lcore_id = lcoreid;
+
+ schedcore[lcoreid] = new_sched;
+
+ new_sched->run_flag = 1;
+
+ DIAG_EVENT(new_sched, LT_DIAG_SCHED_CREATE, rte_lcore_id(), 0);
+
+ rte_wmb();
+ return new_sched;
+}
+
+/*
+ * Set the number of schedulers in the system
+ */
+int lthread_num_schedulers_set(int num)
+{
+ rte_atomic16_set(&num_schedulers, num);
+ return (int)rte_atomic16_read(&num_schedulers);
+}
+
+/*
+ * Return the number of schedulers active
+ */
+int lthread_active_schedulers(void)
+{
+ return (int)rte_atomic16_read(&active_schedulers);
+}
+
+
+/**
+ * shutdown the scheduler running on the specified lcore
+ */
+void lthread_scheduler_shutdown(unsigned lcoreid)
+{
+ uint64_t coreid = (uint64_t) lcoreid;
+
+ if (coreid < LTHREAD_MAX_LCORES) {
+ if (schedcore[coreid] != NULL)
+ schedcore[coreid]->run_flag = 0;
+ }
+}
+
+/**
+ * shutdown all schedulers
+ */
+void lthread_scheduler_shutdown_all(void)
+{
+ uint64_t i;
+
+ /*
+ * give time for all schedulers to have started
+ * Note we use sched_yield() rather than pthread_yield() to allow
+ * for the possibility of a pthread wrapper on lthread_yield(),
+ * something that is not possible unless the scheduler is running.
+ */
+ while (rte_atomic16_read(&active_schedulers) <
+ rte_atomic16_read(&num_schedulers))
+ sched_yield();
+
+ for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
+ if (schedcore[i] != NULL)
+ schedcore[i]->run_flag = 0;
+ }
+}
+
+/*
+ * Resume a suspended lthread
+ */
+static inline void
+_lthread_resume(struct lthread *lt) __attribute__ ((always_inline));
+static inline void _lthread_resume(struct lthread *lt)
+{
+ struct lthread_sched *sched = THIS_SCHED;
+ struct lthread_stack *s;
+ uint64_t state = lt->state;
+#if LTHREAD_DIAG
+ int init = 0;
+#endif
+
+ sched->current_lthread = lt;
+
+ if (state & (BIT(ST_LT_CANCELLED) | BIT(ST_LT_EXITED))) {
+ /* if detached we can free the thread now */
+ if (state & BIT(ST_LT_DETACH)) {
+ _lthread_free(lt);
+ sched->current_lthread = NULL;
+ return;
+ }
+ }
+
+ if (state & BIT(ST_LT_INIT)) {
+ /* first time this thread has been run */
+ /* assign thread to this scheduler */
+ lt->sched = THIS_SCHED;
+
+ /* allocate stack */
+ s = _stack_alloc();
+
+ lt->stack_container = s;
+ _lthread_set_stack(lt, s->stack, s->stack_size);
+
+ /* allocate memory for TLS used by this thread */
+ _lthread_tls_alloc(lt);
+
+ lt->state = BIT(ST_LT_READY);
+#if LTHREAD_DIAG
+ init = 1;
+#endif
+ }
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESUMED, init, lt);
+
+ /* switch to the new thread */
+ ctx_switch(&lt->ctx, &sched->ctx);
+
+ /* If posting to a queue that could be read by another lcore
+ * we defer the queue write till now to ensure the context has been
+ * saved before the other core tries to resume it
+ * This applies to blocking on mutex, cond, and to set_affinity
+ */
+ if (lt->pending_wr_queue != NULL) {
+ struct lthread_queue *dest = lt->pending_wr_queue;
+
+ lt->pending_wr_queue = NULL;
+
+ /* queue the current thread to the specified queue */
+ _lthread_queue_insert_mp(dest, lt);
+ }
+
+ sched->current_lthread = NULL;
+}
+
+/*
+ * Handle sleep timer expiry
+*/
+void
+_sched_timer_cb(struct rte_timer *tim, void *arg)
+{
+ struct lthread *lt = (struct lthread *) arg;
+ uint64_t state = lt->state;
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_EXPIRED, &lt->tim, 0);
+
+ rte_timer_stop(tim);
+
+ if (lt->state & BIT(ST_LT_CANCELLED))
+ (THIS_SCHED)->nb_blocked_threads--;
+
+ lt->state = state | BIT(ST_LT_EXPIRED);
+ _lthread_resume(lt);
+ lt->state = state & CLEARBIT(ST_LT_EXPIRED);
+}
+
+
+
+/*
+ * Returns 0 if there is a pending job in scheduler or 1 if done and can exit.
+ */
+static inline int _lthread_sched_isdone(struct lthread_sched *sched)
+{
+ return (sched->run_flag == 0) &&
+ (_lthread_queue_empty(sched->ready)) &&
+ (_lthread_queue_empty(sched->pready)) &&
+ (sched->nb_blocked_threads == 0);
+}
+
+/*
+ * Wait for all schedulers to start
+ */
+static inline void _lthread_schedulers_sync_start(void)
+{
+ rte_atomic16_inc(&active_schedulers);
+
+ /* wait for lthread schedulers
+ * Note we use sched_yield() rather than pthread_yield() to allow
+ * for the possibility of a pthread wrapper on lthread_yield(),
+ * something that is not possible unless the scheduler is running.
+ */
+ while (rte_atomic16_read(&active_schedulers) <
+ rte_atomic16_read(&num_schedulers))
+ sched_yield();
+
+}
+
+/*
+ * Wait for all schedulers to stop
+ */
+static inline void _lthread_schedulers_sync_stop(void)
+{
+ rte_atomic16_dec(&active_schedulers);
+ rte_atomic16_dec(&num_schedulers);
+
+ /* wait for schedulers
+ * Note we use sched_yield() rather than pthread_yield() to allow
+ * for the possibility of a pthread wrapper on lthread_yield(),
+ * something that is not possible unless the scheduler is running.
+ */
+ while (rte_atomic16_read(&active_schedulers) > 0)
+ sched_yield();
+
+}
+
+
+/*
+ * Run the lthread scheduler
+ * This loop is the heart of the system
+ */
+void lthread_run(void)
+{
+
+ struct lthread_sched *sched = THIS_SCHED;
+ struct lthread *lt = NULL;
+
+ RTE_LOG(INFO, LTHREAD,
+ "starting scheduler %p on lcore %u phys core %u\n",
+ sched, rte_lcore_id(),
+ rte_lcore_index(rte_lcore_id()));
+
+ /* if more than one, wait for all schedulers to start */
+ _lthread_schedulers_sync_start();
+
+
+ /*
+ * This is the main scheduling loop
+ * So long as there are tasks in existence we run this loop.
+ * We check for:-
+ * expired timers,
+ * the local ready queue,
+ * and the peer ready queue,
+ *
+ * and resume lthreads ad infinitum.
+ */
+ while (!_lthread_sched_isdone(sched)) {
+
+ rte_timer_manage();
+
+ lt = _lthread_queue_poll(sched->ready);
+ if (lt != NULL)
+ _lthread_resume(lt);
+ lt = _lthread_queue_poll(sched->pready);
+ if (lt != NULL)
+ _lthread_resume(lt);
+ }
+
+
+ /* if more than one wait for all schedulers to stop */
+ _lthread_schedulers_sync_stop();
+
+ (THIS_SCHED) = NULL;
+
+ RTE_LOG(INFO, LTHREAD,
+ "stopping scheduler %p on lcore %u phys core %u\n",
+ sched, rte_lcore_id(),
+ rte_lcore_index(rte_lcore_id()));
+ fflush(stdout);
+}
+
+/*
+ * Return the scheduler for this lcore
+ *
+ */
+struct lthread_sched *_lthread_sched_get(int lcore_id)
+{
+ if (lcore_id > LTHREAD_MAX_LCORES)
+ return NULL;
+ return schedcore[lcore_id];
+}
+
+/*
+ * migrate the current thread to another scheduler running
+ * on the specified lcore.
+ */
+int lthread_set_affinity(unsigned lcoreid)
+{
+ struct lthread *lt = THIS_LTHREAD;
+ struct lthread_sched *dest_sched;
+
+ if (unlikely(lcoreid > LTHREAD_MAX_LCORES))
+ return POSIX_ERRNO(EINVAL);
+
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_AFFINITY, lcoreid, 0);
+
+ dest_sched = schedcore[lcoreid];
+
+ if (unlikely(dest_sched == NULL))
+ return POSIX_ERRNO(EINVAL);
+
+ if (likely(dest_sched != THIS_SCHED)) {
+ lt->sched = dest_sched;
+ lt->pending_wr_queue = dest_sched->pready;
+ _affinitize();
+ return 0;
+ }
+ return 0;
+}
diff --git a/examples/performance-thread/common/lthread_sched.h b/examples/performance-thread/common/lthread_sched.h
new file mode 100644
index 00000000..4ce56c27
--- /dev/null
+++ b/examples/performance-thread/common/lthread_sched.h
@@ -0,0 +1,152 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Some portions of this software is derived from the
+ * https://github.com/halayli/lthread which carrys the following license.
+ *
+ * Copyright (C) 2012, Hasan Alayli <halayli@gmail.com>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef LTHREAD_SCHED_H_
+#define LTHREAD_SCHED_H_
+
+#include "lthread_int.h"
+#include "lthread_queue.h"
+#include "lthread_objcache.h"
+#include "lthread_diag.h"
+#include "ctx.h"
+
+/*
+ * insert an lthread into a queue
+ */
+static inline void
+_ready_queue_insert(struct lthread_sched *sched, struct lthread *lt)
+{
+ if (sched == THIS_SCHED)
+ _lthread_queue_insert_sp((THIS_SCHED)->ready, lt);
+ else
+ _lthread_queue_insert_mp(sched->pready, lt);
+}
+
+/*
+ * remove an lthread from a queue
+ */
+static inline struct lthread *_ready_queue_remove(struct lthread_queue *q)
+{
+ return _lthread_queue_remove(q);
+}
+
+/**
+ * Return true if the ready queue is empty
+ */
+static inline int _ready_queue_empty(struct lthread_queue *q)
+{
+ return _lthread_queue_empty(q);
+}
+
+static inline uint64_t _sched_now(void)
+{
+ uint64_t now = rte_rdtsc();
+
+ if (now > (THIS_SCHED)->birth)
+ return now - (THIS_SCHED)->birth;
+ if (now < (THIS_SCHED)->birth)
+ return (THIS_SCHED)->birth - now;
+ /* never return 0 because this means sleep forever */
+ return 1;
+}
+
+static inline void
+_affinitize(void) __attribute__ ((always_inline));
+static inline void
+_affinitize(void)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_SUSPENDED, 0, 0);
+ ctx_switch(&(THIS_SCHED)->ctx, &lt->ctx);
+}
+
+static inline void
+_suspend(void) __attribute__ ((always_inline));
+static inline void
+_suspend(void)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ (THIS_SCHED)->nb_blocked_threads++;
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_SUSPENDED, 0, 0);
+ ctx_switch(&(THIS_SCHED)->ctx, &lt->ctx);
+ (THIS_SCHED)->nb_blocked_threads--;
+}
+
+static inline void
+_reschedule(void) __attribute__ ((always_inline));
+static inline void
+_reschedule(void)
+{
+ struct lthread *lt = THIS_LTHREAD;
+
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_RESCHEDULED, 0, 0);
+ _ready_queue_insert(THIS_SCHED, lt);
+ ctx_switch(&(THIS_SCHED)->ctx, &lt->ctx);
+}
+
+extern struct lthread_sched *schedcore[];
+void _sched_timer_cb(struct rte_timer *tim, void *arg);
+void _sched_shutdown(__rte_unused void *arg);
+
+
+#endif /* LTHREAD_SCHED_H_ */
diff --git a/examples/performance-thread/common/lthread_timer.h b/examples/performance-thread/common/lthread_timer.h
new file mode 100644
index 00000000..b5e6fb0e
--- /dev/null
+++ b/examples/performance-thread/common/lthread_timer.h
@@ -0,0 +1,79 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#ifndef LTHREAD_TIMER_H_
+#define LTHREAD_TIMER_H_
+
+#include "lthread_int.h"
+#include "lthread_sched.h"
+
+
+static inline uint64_t
+_ns_to_clks(uint64_t ns)
+{
+ unsigned __int128 clkns = rte_get_tsc_hz();
+
+ clkns *= ns;
+ clkns /= 1000000000;
+ return (uint64_t) clkns;
+}
+
+
+static inline void
+_timer_start(struct lthread *lt, uint64_t clks)
+{
+ if (clks > 0) {
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_START, &lt->tim, clks);
+ rte_timer_init(&lt->tim);
+ rte_timer_reset(&lt->tim,
+ clks,
+ SINGLE,
+ rte_lcore_id(),
+ _sched_timer_cb,
+ (void *)lt);
+ }
+}
+
+
+static inline void
+_timer_stop(struct lthread *lt)
+{
+ if (lt != NULL) {
+ DIAG_EVENT(lt, LT_DIAG_LTHREAD_TMR_DELETE, &lt->tim, 0);
+ rte_timer_stop(&lt->tim);
+ }
+}
+
+
+#endif /* LTHREAD_TIMER_H_ */
diff --git a/examples/performance-thread/common/lthread_tls.c b/examples/performance-thread/common/lthread_tls.c
new file mode 100644
index 00000000..43cda4ff
--- /dev/null
+++ b/examples/performance-thread/common/lthread_tls.c
@@ -0,0 +1,253 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <limits.h>
+#include <inttypes.h>
+#include <unistd.h>
+#include <pthread.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <execinfo.h>
+#include <sched.h>
+
+#include <rte_malloc.h>
+#include <rte_log.h>
+#include <rte_ring.h>
+#include <rte_atomic_64.h>
+
+#include "lthread_tls.h"
+#include "lthread_queue.h"
+#include "lthread_objcache.h"
+#include "lthread_sched.h"
+
+static struct rte_ring *key_pool;
+static uint64_t key_pool_init;
+
+/* needed to cause section start and end to be defined */
+RTE_DEFINE_PER_LTHREAD(void *, dummy);
+
+static struct lthread_key key_table[LTHREAD_MAX_KEYS];
+
+void lthread_tls_ctor(void) __attribute__((constructor));
+
+void lthread_tls_ctor(void)
+{
+ key_pool = NULL;
+ key_pool_init = 0;
+}
+
+/*
+ * Initialize a pool of keys
+ * These are unique tokens that can be obtained by threads
+ * calling lthread_key_create()
+ */
+void _lthread_key_pool_init(void)
+{
+ static struct rte_ring *pool;
+ struct lthread_key *new_key;
+ char name[MAX_LTHREAD_NAME_SIZE];
+
+ bzero(key_table, sizeof(key_table));
+
+ /* only one lcore should do this */
+ if (rte_atomic64_cmpset(&key_pool_init, 0, 1)) {
+
+ snprintf(name,
+ MAX_LTHREAD_NAME_SIZE,
+ "lthread_key_pool_%d",
+ getpid());
+
+ pool = rte_ring_create(name,
+ LTHREAD_MAX_KEYS, 0, 0);
+ LTHREAD_ASSERT(pool);
+
+ int i;
+
+ for (i = 1; i < LTHREAD_MAX_KEYS; i++) {
+ new_key = &key_table[i];
+ rte_ring_mp_enqueue((struct rte_ring *)pool,
+ (void *)new_key);
+ }
+ key_pool = pool;
+ }
+ /* other lcores wait here till done */
+ while (key_pool == NULL) {
+ rte_compiler_barrier();
+ sched_yield();
+ };
+}
+
+/*
+ * Create a key
+ * this means getting a key from the the pool
+ */
+int lthread_key_create(unsigned int *key, tls_destructor_func destructor)
+{
+ if (key == NULL)
+ return POSIX_ERRNO(EINVAL);
+
+ struct lthread_key *new_key;
+
+ if (rte_ring_mc_dequeue((struct rte_ring *)key_pool, (void **)&new_key)
+ == 0) {
+ new_key->destructor = destructor;
+ *key = (new_key - key_table);
+
+ return 0;
+ }
+ return POSIX_ERRNO(EAGAIN);
+}
+
+
+/*
+ * Delete a key
+ */
+int lthread_key_delete(unsigned int k)
+{
+ struct lthread_key *key;
+
+ key = (struct lthread_key *) &key_table[k];
+
+ if (k > LTHREAD_MAX_KEYS)
+ return POSIX_ERRNO(EINVAL);
+
+ key->destructor = NULL;
+ rte_ring_mp_enqueue((struct rte_ring *)key_pool,
+ (void *)key);
+ return 0;
+}
+
+
+
+/*
+ * Break association for all keys in use by this thread
+ * invoke the destructor if available.
+ * Since a destructor can create keys we could enter an infinite loop
+ * therefore we give up after LTHREAD_DESTRUCTOR_ITERATIONS
+ * the behavior is modelled on pthread
+ */
+void _lthread_tls_destroy(struct lthread *lt)
+{
+ int i, k;
+ int nb_keys;
+ void *data;
+
+ for (i = 0; i < LTHREAD_DESTRUCTOR_ITERATIONS; i++) {
+
+ for (k = 1; k < LTHREAD_MAX_KEYS; k++) {
+
+ /* no keys in use ? */
+ nb_keys = lt->tls->nb_keys_inuse;
+ if (nb_keys == 0)
+ return;
+
+ /* this key not in use ? */
+ if (lt->tls->data[k] == NULL)
+ continue;
+
+ /* remove this key */
+ data = lt->tls->data[k];
+ lt->tls->data[k] = NULL;
+ lt->tls->nb_keys_inuse = nb_keys-1;
+
+ /* invoke destructor */
+ if (key_table[k].destructor != NULL)
+ key_table[k].destructor(data);
+ }
+ }
+}
+
+/*
+ * Return the pointer associated with a key
+ * If the key is no longer valid return NULL
+ */
+void
+*lthread_getspecific(unsigned int k)
+{
+
+ if (k > LTHREAD_MAX_KEYS)
+ return NULL;
+
+ return THIS_LTHREAD->tls->data[k];
+}
+
+/*
+ * Set a value against a key
+ * If the key is no longer valid return an error
+ * when storing value
+ */
+int lthread_setspecific(unsigned int k, const void *data)
+{
+ if (k > LTHREAD_MAX_KEYS)
+ return POSIX_ERRNO(EINVAL);
+
+ int n = THIS_LTHREAD->tls->nb_keys_inuse;
+
+ /* discard const qualifier */
+ char *p = (char *) (uintptr_t) data;
+
+
+ if (data != NULL) {
+ if (THIS_LTHREAD->tls->data[k] == NULL)
+ THIS_LTHREAD->tls->nb_keys_inuse = n+1;
+ }
+
+ THIS_LTHREAD->tls->data[k] = (void *) p;
+ return 0;
+}
+
+/*
+ * Allocate data for TLS cache
+*/
+void _lthread_tls_alloc(struct lthread *lt)
+{
+ struct lthread_tls *tls;
+
+ tls = _lthread_objcache_alloc((THIS_SCHED)->tls_cache);
+
+ LTHREAD_ASSERT(tls != NULL);
+
+ tls->root_sched = (THIS_SCHED);
+ lt->tls = tls;
+
+ /* allocate data for TLS varaiables using RTE_PER_LTHREAD macros */
+ if (sizeof(void *) < (uint64_t)RTE_PER_LTHREAD_SECTION_SIZE) {
+ lt->per_lthread_data =
+ _lthread_objcache_alloc((THIS_SCHED)->per_lthread_cache);
+ }
+}
diff --git a/examples/performance-thread/common/lthread_tls.h b/examples/performance-thread/common/lthread_tls.h
new file mode 100644
index 00000000..86cbfadc
--- /dev/null
+++ b/examples/performance-thread/common/lthread_tls.h
@@ -0,0 +1,57 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef LTHREAD_TLS_H_
+#define LTHREAD_TLS_H_
+
+#include "lthread_api.h"
+
+#define RTE_PER_LTHREAD_SECTION_SIZE \
+(&__stop_per_lt - &__start_per_lt)
+
+struct lthread_key {
+ tls_destructor_func destructor;
+};
+
+struct lthread_tls {
+ void *data[LTHREAD_MAX_KEYS];
+ int nb_keys_inuse;
+ struct lthread_sched *root_sched;
+};
+
+void _lthread_tls_destroy(struct lthread *lt);
+void _lthread_key_pool_init(void);
+void _lthread_tls_alloc(struct lthread *lt);
+
+
+#endif /* LTHREAD_TLS_H_ */
diff --git a/examples/performance-thread/l3fwd-thread/Makefile b/examples/performance-thread/l3fwd-thread/Makefile
new file mode 100644
index 00000000..d8fe5e68
--- /dev/null
+++ b/examples/performance-thread/l3fwd-thread/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = l3fwd-thread
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+include $(RTE_SDK)/examples/performance-thread/common/common.mk
+
+CFLAGS += -O3 -g $(USER_FLAGS) $(INCLUDES) $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+#ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+#endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/performance-thread/l3fwd-thread/main.c b/examples/performance-thread/l3fwd-thread/main.c
new file mode 100644
index 00000000..15c0a4de
--- /dev/null
+++ b/examples/performance-thread/l3fwd-thread/main.c
@@ -0,0 +1,3651 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_vect.h>
+#include <rte_byteorder.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+#include <rte_udp.h>
+#include <rte_string_fns.h>
+
+#include <cmdline_parse.h>
+#include <cmdline_parse_etheraddr.h>
+
+#include <lthread_api.h>
+
+#define APP_LOOKUP_EXACT_MATCH 0
+#define APP_LOOKUP_LPM 1
+#define DO_RFC_1812_CHECKS
+
+/* Enable cpu-load stats 0-off, 1-on */
+#define APP_CPU_LOAD 1
+
+#ifndef APP_LOOKUP_METHOD
+#define APP_LOOKUP_METHOD APP_LOOKUP_LPM
+#endif
+
+/*
+ * When set to zero, simple forwaring path is eanbled.
+ * When set to one, optimized forwarding path is enabled.
+ * Note that LPM optimisation path uses SSE4.1 instructions.
+ */
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && !defined(__SSE4_1__))
+#define ENABLE_MULTI_BUFFER_OPTIMIZE 0
+#else
+#define ENABLE_MULTI_BUFFER_OPTIMIZE 1
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+#include <rte_hash.h>
+#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+#include <rte_lpm.h>
+#include <rte_lpm6.h>
+#else
+#error "APP_LOOKUP_METHOD set to incorrect value"
+#endif
+
+#define RTE_LOGTYPE_L3FWD RTE_LOGTYPE_USER1
+
+#define MAX_JUMBO_PKT_LEN 9600
+
+#define IPV6_ADDR_LEN 16
+
+#define MEMPOOL_CACHE_SIZE 256
+
+/*
+ * This expression is used to calculate the number of mbufs needed depending on
+ * user input, taking into account memory for rx and tx hardware rings, cache
+ * per lcore and mtable per port per lcore. RTE_MAX is used to ensure that
+ * NB_MBUF never goes below a minimum value of 8192
+ */
+
+#define NB_MBUF RTE_MAX(\
+ (nb_ports*nb_rx_queue*RTE_TEST_RX_DESC_DEFAULT + \
+ nb_ports*nb_lcores*MAX_PKT_BURST + \
+ nb_ports*n_tx_queue*RTE_TEST_TX_DESC_DEFAULT + \
+ nb_lcores*MEMPOOL_CACHE_SIZE), \
+ (unsigned)8192)
+
+#define MAX_PKT_BURST 32
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/*
+ * Try to avoid TX buffering if we have at least MAX_TX_BURST packets to send.
+ */
+#define MAX_TX_BURST (MAX_PKT_BURST / 2)
+#define BURST_SIZE MAX_TX_BURST
+
+#define NB_SOCKETS 8
+
+/* Configure how many packets ahead to prefetch, when reading packets */
+#define PREFETCH_OFFSET 3
+
+/* Used to mark destination port as 'invalid'. */
+#define BAD_PORT ((uint16_t)-1)
+
+#define FWDSTEP 4
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 128
+static uint16_t nb_rxd = RTE_TEST_RX_DESC_DEFAULT;
+static uint16_t nb_txd = RTE_TEST_TX_DESC_DEFAULT;
+
+/* ethernet addresses of ports */
+static uint64_t dest_eth_addr[RTE_MAX_ETHPORTS];
+static struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+static __m128i val_eth[RTE_MAX_ETHPORTS];
+
+/* replace first 12B of the ethernet header. */
+#define MASK_ETH 0x3f
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+static int promiscuous_on; /**< $et in promiscuous mode off by default. */
+static int numa_on = 1; /**< NUMA is enabled by default. */
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static int ipv6; /**< ipv6 is false by default. */
+#endif
+
+#if (APP_CPU_LOAD == 1)
+
+#define MAX_CPU RTE_MAX_LCORE
+#define CPU_LOAD_TIMEOUT_US (5 * 1000 * 1000) /**< Timeout for collecting 5s */
+
+#define CPU_PROCESS 0
+#define CPU_POLL 1
+#define MAX_CPU_COUNTER 2
+
+struct cpu_load {
+ uint16_t n_cpu;
+ uint64_t counter;
+ uint64_t hits[MAX_CPU_COUNTER][MAX_CPU];
+} __rte_cache_aligned;
+
+static struct cpu_load cpu_load;
+static int cpu_load_lcore_id = -1;
+
+#define SET_CPU_BUSY(thread, counter) \
+ thread->conf.busy[counter] = 1
+
+#define SET_CPU_IDLE(thread, counter) \
+ thread->conf.busy[counter] = 0
+
+#define IS_CPU_BUSY(thread, counter) \
+ (thread->conf.busy[counter] > 0)
+
+#else
+
+#define SET_CPU_BUSY(thread, counter)
+#define SET_CPU_IDLE(thread, counter)
+#define IS_CPU_BUSY(thread, counter) 0
+
+#endif
+
+struct mbuf_table {
+ uint16_t len;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+struct lcore_rx_queue {
+ uint8_t port_id;
+ uint8_t queue_id;
+} __rte_cache_aligned;
+
+#define MAX_RX_QUEUE_PER_LCORE 16
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_LCORE_PARAMS 1024
+struct rx_thread_params {
+ uint8_t port_id;
+ uint8_t queue_id;
+ uint8_t lcore_id;
+ uint8_t thread_id;
+} __rte_cache_aligned;
+
+static struct rx_thread_params rx_thread_params_array[MAX_LCORE_PARAMS];
+static struct rx_thread_params rx_thread_params_array_default[] = {
+ {0, 0, 2, 0},
+ {0, 1, 2, 1},
+ {0, 2, 2, 2},
+ {1, 0, 2, 3},
+ {1, 1, 2, 4},
+ {1, 2, 2, 5},
+ {2, 0, 2, 6},
+ {3, 0, 3, 7},
+ {3, 1, 3, 8},
+};
+
+static struct rx_thread_params *rx_thread_params =
+ rx_thread_params_array_default;
+static uint16_t nb_rx_thread_params = RTE_DIM(rx_thread_params_array_default);
+
+struct tx_thread_params {
+ uint8_t lcore_id;
+ uint8_t thread_id;
+} __rte_cache_aligned;
+
+static struct tx_thread_params tx_thread_params_array[MAX_LCORE_PARAMS];
+static struct tx_thread_params tx_thread_params_array_default[] = {
+ {4, 0},
+ {5, 1},
+ {6, 2},
+ {7, 3},
+ {8, 4},
+ {9, 5},
+ {10, 6},
+ {11, 7},
+ {12, 8},
+};
+
+static struct tx_thread_params *tx_thread_params =
+ tx_thread_params_array_default;
+static uint16_t nb_tx_thread_params = RTE_DIM(tx_thread_params_array_default);
+
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 1, /**< IP checksum offload enabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_TCP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+static struct rte_mempool *pktmbuf_pool[NB_SOCKETS];
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+#include <rte_hash_crc.h>
+#define DEFAULT_HASH_FUNC rte_hash_crc
+#else
+#include <rte_jhash.h>
+#define DEFAULT_HASH_FUNC rte_jhash
+#endif
+
+struct ipv4_5tuple {
+ uint32_t ip_dst;
+ uint32_t ip_src;
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+union ipv4_5tuple_host {
+ struct {
+ uint8_t pad0;
+ uint8_t proto;
+ uint16_t pad1;
+ uint32_t ip_src;
+ uint32_t ip_dst;
+ uint16_t port_src;
+ uint16_t port_dst;
+ };
+ __m128i xmm;
+};
+
+#define XMM_NUM_IN_IPV6_5TUPLE 3
+
+struct ipv6_5tuple {
+ uint8_t ip_dst[IPV6_ADDR_LEN];
+ uint8_t ip_src[IPV6_ADDR_LEN];
+ uint16_t port_dst;
+ uint16_t port_src;
+ uint8_t proto;
+} __attribute__((__packed__));
+
+union ipv6_5tuple_host {
+ struct {
+ uint16_t pad0;
+ uint8_t proto;
+ uint8_t pad1;
+ uint8_t ip_src[IPV6_ADDR_LEN];
+ uint8_t ip_dst[IPV6_ADDR_LEN];
+ uint16_t port_src;
+ uint16_t port_dst;
+ uint64_t reserve;
+ };
+ __m128i xmm[XMM_NUM_IN_IPV6_5TUPLE];
+};
+
+struct ipv4_l3fwd_route {
+ struct ipv4_5tuple key;
+ uint8_t if_out;
+};
+
+struct ipv6_l3fwd_route {
+ struct ipv6_5tuple key;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
+ {{IPv4(101, 0, 0, 0), IPv4(100, 10, 0, 1), 101, 11, IPPROTO_TCP}, 0},
+ {{IPv4(201, 0, 0, 0), IPv4(200, 20, 0, 1), 102, 12, IPPROTO_TCP}, 1},
+ {{IPv4(111, 0, 0, 0), IPv4(100, 30, 0, 1), 101, 11, IPPROTO_TCP}, 2},
+ {{IPv4(211, 0, 0, 0), IPv4(200, 40, 0, 1), 102, 12, IPPROTO_TCP}, 3},
+};
+
+static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
+ {{
+ {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0x80, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
+ 0x05},
+ 101, 11, IPPROTO_TCP}, 0},
+
+ {{
+ {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0x90, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
+ 0x05},
+ 102, 12, IPPROTO_TCP}, 1},
+
+ {{
+ {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0xa0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
+ 0x05},
+ 101, 11, IPPROTO_TCP}, 2},
+
+ {{
+ {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1e, 0x67, 0xff, 0xfe, 0, 0, 0},
+ {0xfe, 0xb0, 0, 0, 0, 0, 0, 0, 0x02, 0x1b, 0x21, 0xff, 0xfe, 0x91, 0x38,
+ 0x05},
+ 102, 12, IPPROTO_TCP}, 3},
+};
+
+typedef struct rte_hash lookup_struct_t;
+static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
+static lookup_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
+
+#ifdef RTE_ARCH_X86_64
+/* default to 4 million hash entries (approx) */
+#define L3FWD_HASH_ENTRIES (1024*1024*4)
+#else
+/* 32-bit has less address-space for hugepage memory, limit to 1M entries */
+#define L3FWD_HASH_ENTRIES (1024*1024*1)
+#endif
+#define HASH_ENTRY_NUMBER_DEFAULT 4
+
+static uint32_t hash_entry_number = HASH_ENTRY_NUMBER_DEFAULT;
+
+static inline uint32_t
+ipv4_hash_crc(const void *data, __rte_unused uint32_t data_len,
+ uint32_t init_val)
+{
+ const union ipv4_5tuple_host *k;
+ uint32_t t;
+ const uint32_t *p;
+
+ k = data;
+ t = k->proto;
+ p = (const uint32_t *)&k->port_src;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ init_val = rte_hash_crc_4byte(t, init_val);
+ init_val = rte_hash_crc_4byte(k->ip_src, init_val);
+ init_val = rte_hash_crc_4byte(k->ip_dst, init_val);
+ init_val = rte_hash_crc_4byte(*p, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ init_val = rte_jhash_1word(t, init_val);
+ init_val = rte_jhash_1word(k->ip_src, init_val);
+ init_val = rte_jhash_1word(k->ip_dst, init_val);
+ init_val = rte_jhash_1word(*p, init_val);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ return init_val;
+}
+
+static inline uint32_t
+ipv6_hash_crc(const void *data, __rte_unused uint32_t data_len,
+ uint32_t init_val)
+{
+ const union ipv6_5tuple_host *k;
+ uint32_t t;
+ const uint32_t *p;
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ const uint32_t *ip_src0, *ip_src1, *ip_src2, *ip_src3;
+ const uint32_t *ip_dst0, *ip_dst1, *ip_dst2, *ip_dst3;
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+
+ k = data;
+ t = k->proto;
+ p = (const uint32_t *)&k->port_src;
+
+#ifdef RTE_MACHINE_CPUFLAG_SSE4_2
+ ip_src0 = (const uint32_t *) k->ip_src;
+ ip_src1 = (const uint32_t *)(k->ip_src + 4);
+ ip_src2 = (const uint32_t *)(k->ip_src + 8);
+ ip_src3 = (const uint32_t *)(k->ip_src + 12);
+ ip_dst0 = (const uint32_t *) k->ip_dst;
+ ip_dst1 = (const uint32_t *)(k->ip_dst + 4);
+ ip_dst2 = (const uint32_t *)(k->ip_dst + 8);
+ ip_dst3 = (const uint32_t *)(k->ip_dst + 12);
+ init_val = rte_hash_crc_4byte(t, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src0, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src1, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src2, init_val);
+ init_val = rte_hash_crc_4byte(*ip_src3, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst0, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst1, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst2, init_val);
+ init_val = rte_hash_crc_4byte(*ip_dst3, init_val);
+ init_val = rte_hash_crc_4byte(*p, init_val);
+#else /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ init_val = rte_jhash_1word(t, init_val);
+ init_val = rte_jhash(k->ip_src, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
+ init_val = rte_jhash(k->ip_dst, sizeof(uint8_t) * IPV6_ADDR_LEN, init_val);
+ init_val = rte_jhash_1word(*p, init_val);
+#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */
+ return init_val;
+}
+
+#define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
+#define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
+
+static uint8_t ipv4_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+static uint8_t ipv6_l3fwd_out_if[L3FWD_HASH_ENTRIES] __rte_cache_aligned;
+
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+struct ipv4_l3fwd_route {
+ uint32_t ip;
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+struct ipv6_l3fwd_route {
+ uint8_t ip[16];
+ uint8_t depth;
+ uint8_t if_out;
+};
+
+static struct ipv4_l3fwd_route ipv4_l3fwd_route_array[] = {
+ {IPv4(1, 1, 1, 0), 24, 0},
+ {IPv4(2, 1, 1, 0), 24, 1},
+ {IPv4(3, 1, 1, 0), 24, 2},
+ {IPv4(4, 1, 1, 0), 24, 3},
+ {IPv4(5, 1, 1, 0), 24, 4},
+ {IPv4(6, 1, 1, 0), 24, 5},
+ {IPv4(7, 1, 1, 0), 24, 6},
+ {IPv4(8, 1, 1, 0), 24, 7},
+};
+
+static struct ipv6_l3fwd_route ipv6_l3fwd_route_array[] = {
+ {{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 0},
+ {{2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 1},
+ {{3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 2},
+ {{4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 3},
+ {{5, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 4},
+ {{6, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 5},
+ {{7, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 6},
+ {{8, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 48, 7},
+};
+
+#define IPV4_L3FWD_NUM_ROUTES RTE_DIM(ipv4_l3fwd_route_array)
+#define IPV6_L3FWD_NUM_ROUTES RTE_DIM(ipv6_l3fwd_route_array)
+
+#define IPV4_L3FWD_LPM_MAX_RULES 1024
+#define IPV6_L3FWD_LPM_MAX_RULES 1024
+#define IPV6_L3FWD_LPM_NUMBER_TBL8S (1 << 16)
+
+typedef struct rte_lpm lookup_struct_t;
+typedef struct rte_lpm6 lookup6_struct_t;
+static lookup_struct_t *ipv4_l3fwd_lookup_struct[NB_SOCKETS];
+static lookup6_struct_t *ipv6_l3fwd_lookup_struct[NB_SOCKETS];
+#endif
+
+struct lcore_conf {
+ lookup_struct_t *ipv4_lookup_struct;
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+ lookup6_struct_t *ipv6_lookup_struct;
+#else
+ lookup_struct_t *ipv6_lookup_struct;
+#endif
+ void *data;
+} __rte_cache_aligned;
+
+static struct lcore_conf lcore_conf[RTE_MAX_LCORE];
+RTE_DEFINE_PER_LCORE(struct lcore_conf *, lcore_conf);
+
+#define MAX_RX_QUEUE_PER_THREAD 16
+#define MAX_TX_PORT_PER_THREAD RTE_MAX_ETHPORTS
+#define MAX_TX_QUEUE_PER_PORT RTE_MAX_ETHPORTS
+#define MAX_RX_QUEUE_PER_PORT 128
+
+#define MAX_RX_THREAD 1024
+#define MAX_TX_THREAD 1024
+#define MAX_THREAD (MAX_RX_THREAD + MAX_TX_THREAD)
+
+/**
+ * Producers and consumers threads configuration
+ */
+static int lthreads_on = 1; /**< Use lthreads for processing*/
+
+rte_atomic16_t rx_counter; /**< Number of spawned rx threads */
+rte_atomic16_t tx_counter; /**< Number of spawned tx threads */
+
+struct thread_conf {
+ uint16_t lcore_id; /**< Initial lcore for rx thread */
+ uint16_t cpu_id; /**< Cpu id for cpu load stats counter */
+ uint16_t thread_id; /**< Thread ID */
+
+#if (APP_CPU_LOAD > 0)
+ int busy[MAX_CPU_COUNTER];
+#endif
+};
+
+struct thread_rx_conf {
+ struct thread_conf conf;
+
+ uint16_t n_rx_queue;
+ struct lcore_rx_queue rx_queue_list[MAX_RX_QUEUE_PER_LCORE];
+
+ uint16_t n_ring; /**< Number of output rings */
+ struct rte_ring *ring[RTE_MAX_LCORE];
+ struct lthread_cond *ready[RTE_MAX_LCORE];
+
+#if (APP_CPU_LOAD > 0)
+ int busy[MAX_CPU_COUNTER];
+#endif
+} __rte_cache_aligned;
+
+uint16_t n_rx_thread;
+struct thread_rx_conf rx_thread[MAX_RX_THREAD];
+
+struct thread_tx_conf {
+ struct thread_conf conf;
+
+ uint16_t tx_queue_id[RTE_MAX_LCORE];
+ struct mbuf_table tx_mbufs[RTE_MAX_LCORE];
+
+ struct rte_ring *ring;
+ struct lthread_cond **ready;
+
+} __rte_cache_aligned;
+
+uint16_t n_tx_thread;
+struct thread_tx_conf tx_thread[MAX_TX_THREAD];
+
+/* Send burst of packets on an output interface */
+static inline int
+send_burst(struct thread_tx_conf *qconf, uint16_t n, uint8_t port)
+{
+ struct rte_mbuf **m_table;
+ int ret;
+ uint16_t queueid;
+
+ queueid = qconf->tx_queue_id[port];
+ m_table = (struct rte_mbuf **)qconf->tx_mbufs[port].m_table;
+
+ ret = rte_eth_tx_burst(port, queueid, m_table, n);
+ if (unlikely(ret < n)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < n);
+ }
+
+ return 0;
+}
+
+/* Enqueue a single packet, and send burst if queue is filled */
+static inline int
+send_single_packet(struct rte_mbuf *m, uint8_t port)
+{
+ uint16_t len;
+ struct thread_tx_conf *qconf;
+
+ if (lthreads_on)
+ qconf = (struct thread_tx_conf *)lthread_get_data();
+ else
+ qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
+
+ len = qconf->tx_mbufs[port].len;
+ qconf->tx_mbufs[port].m_table[len] = m;
+ len++;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+ send_burst(qconf, MAX_PKT_BURST, port);
+ len = 0;
+ }
+
+ qconf->tx_mbufs[port].len = len;
+ return 0;
+}
+
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
+ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+static inline __attribute__((always_inline)) void
+send_packetsx4(uint8_t port,
+ struct rte_mbuf *m[], uint32_t num)
+{
+ uint32_t len, j, n;
+ struct thread_tx_conf *qconf;
+
+ if (lthreads_on)
+ qconf = (struct thread_tx_conf *)lthread_get_data();
+ else
+ qconf = (struct thread_tx_conf *)RTE_PER_LCORE(lcore_conf)->data;
+
+ len = qconf->tx_mbufs[port].len;
+
+ /*
+ * If TX buffer for that queue is empty, and we have enough packets,
+ * then send them straightway.
+ */
+ if (num >= MAX_TX_BURST && len == 0) {
+ n = rte_eth_tx_burst(port, qconf->tx_queue_id[port], m, num);
+ if (unlikely(n < num)) {
+ do {
+ rte_pktmbuf_free(m[n]);
+ } while (++n < num);
+ }
+ return;
+ }
+
+ /*
+ * Put packets into TX buffer for that queue.
+ */
+
+ n = len + num;
+ n = (n > MAX_PKT_BURST) ? MAX_PKT_BURST - len : num;
+
+ j = 0;
+ switch (n % FWDSTEP) {
+ while (j < n) {
+ case 0:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 3:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 2:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ case 1:
+ qconf->tx_mbufs[port].m_table[len + j] = m[j];
+ j++;
+ }
+ }
+
+ len += n;
+
+ /* enough pkts to be sent */
+ if (unlikely(len == MAX_PKT_BURST)) {
+
+ send_burst(qconf, MAX_PKT_BURST, port);
+
+ /* copy rest of the packets into the TX buffer. */
+ len = num - n;
+ j = 0;
+ switch (len % FWDSTEP) {
+ while (j < len) {
+ case 0:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 3:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 2:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ case 1:
+ qconf->tx_mbufs[port].m_table[j] = m[n + j];
+ j++;
+ }
+ }
+ }
+
+ qconf->tx_mbufs[port].len = len;
+}
+#endif /* APP_LOOKUP_LPM */
+
+#ifdef DO_RFC_1812_CHECKS
+static inline int
+is_valid_ipv4_pkt(struct ipv4_hdr *pkt, uint32_t link_len)
+{
+ /* From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2 */
+ /*
+ * 1. The packet length reported by the Link Layer must be large
+ * enough to hold the minimum length legal IP datagram (20 bytes).
+ */
+ if (link_len < sizeof(struct ipv4_hdr))
+ return -1;
+
+ /* 2. The IP checksum must be correct. */
+ /* this is checked in H/W */
+
+ /*
+ * 3. The IP version number must be 4. If the version number is not 4
+ * then the packet may be another version of IP, such as IPng or
+ * ST-II.
+ */
+ if (((pkt->version_ihl) >> 4) != 4)
+ return -3;
+ /*
+ * 4. The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ */
+ if ((pkt->version_ihl & 0xf) < 5)
+ return -4;
+
+ /*
+ * 5. The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ */
+ if (rte_cpu_to_be_16(pkt->total_length) < sizeof(struct ipv4_hdr))
+ return -5;
+
+ return 0;
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+
+static __m128i mask0;
+static __m128i mask1;
+static __m128i mask2;
+static inline uint8_t
+get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid,
+ lookup_struct_t *ipv4_l3fwd_lookup_struct)
+{
+ int ret = 0;
+ union ipv4_5tuple_host key;
+
+ ipv4_hdr = (uint8_t *)ipv4_hdr + offsetof(struct ipv4_hdr, time_to_live);
+ __m128i data = _mm_loadu_si128((__m128i *)(ipv4_hdr));
+ /* Get 5 tuple: dst port, src port, dst IP address, src IP address and
+ protocol */
+ key.xmm = _mm_and_si128(data, mask0);
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv4_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0) ? portid : ipv4_l3fwd_out_if[ret]);
+}
+
+static inline uint8_t
+get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid,
+ lookup_struct_t *ipv6_l3fwd_lookup_struct)
+{
+ int ret = 0;
+ union ipv6_5tuple_host key;
+
+ ipv6_hdr = (uint8_t *)ipv6_hdr + offsetof(struct ipv6_hdr, payload_len);
+ __m128i data0 = _mm_loadu_si128((__m128i *)(ipv6_hdr));
+ __m128i data1 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
+ sizeof(__m128i)));
+ __m128i data2 = _mm_loadu_si128((__m128i *)(((uint8_t *)ipv6_hdr) +
+ sizeof(__m128i) + sizeof(__m128i)));
+ /* Get part of 5 tuple: src IP address lower 96 bits and protocol */
+ key.xmm[0] = _mm_and_si128(data0, mask1);
+ /* Get part of 5 tuple: dst IP address lower 96 bits and src IP address
+ higher 32 bits */
+ key.xmm[1] = data1;
+ /* Get part of 5 tuple: dst port and src port and dst IP address higher
+ 32 bits */
+ key.xmm[2] = _mm_and_si128(data2, mask2);
+
+ /* Find destination port */
+ ret = rte_hash_lookup(ipv6_l3fwd_lookup_struct, (const void *)&key);
+ return (uint8_t)((ret < 0) ? portid : ipv6_l3fwd_out_if[ret]);
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+
+static inline uint8_t
+get_ipv4_dst_port(void *ipv4_hdr, uint8_t portid,
+ lookup_struct_t *ipv4_l3fwd_lookup_struct)
+{
+ uint32_t next_hop;
+
+ return (uint8_t)((rte_lpm_lookup(ipv4_l3fwd_lookup_struct,
+ rte_be_to_cpu_32(((struct ipv4_hdr *)ipv4_hdr)->dst_addr),
+ &next_hop) == 0) ? next_hop : portid);
+}
+
+static inline uint8_t
+get_ipv6_dst_port(void *ipv6_hdr, uint8_t portid,
+ lookup6_struct_t *ipv6_l3fwd_lookup_struct)
+{
+ uint8_t next_hop;
+
+ return (uint8_t) ((rte_lpm6_lookup(ipv6_l3fwd_lookup_struct,
+ ((struct ipv6_hdr *)ipv6_hdr)->dst_addr, &next_hop) == 0) ?
+ next_hop : portid);
+}
+#endif
+
+static inline void l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
+ __attribute__((unused));
+
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH) && \
+ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+
+#define MASK_ALL_PKTS 0xff
+#define EXCLUDE_1ST_PKT 0xfe
+#define EXCLUDE_2ND_PKT 0xfd
+#define EXCLUDE_3RD_PKT 0xfb
+#define EXCLUDE_4TH_PKT 0xf7
+#define EXCLUDE_5TH_PKT 0xef
+#define EXCLUDE_6TH_PKT 0xdf
+#define EXCLUDE_7TH_PKT 0xbf
+#define EXCLUDE_8TH_PKT 0x7f
+
+static inline void
+simple_ipv4_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
+{
+ struct ether_hdr *eth_hdr[8];
+ struct ipv4_hdr *ipv4_hdr[8];
+ uint8_t dst_port[8];
+ int32_t ret[8];
+ union ipv4_5tuple_host key[8];
+ __m128i data[8];
+
+ eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
+ eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
+ eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
+ eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
+
+ /* Handle IPv4 headers.*/
+ ipv4_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+ ipv4_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ uint8_t valid_mask = MASK_ALL_PKTS;
+
+ if (is_valid_ipv4_pkt(ipv4_hdr[0], m[0]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[0]);
+ valid_mask &= EXCLUDE_1ST_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[1], m[1]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[1]);
+ valid_mask &= EXCLUDE_2ND_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[2], m[2]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[2]);
+ valid_mask &= EXCLUDE_3RD_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[3], m[3]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[3]);
+ valid_mask &= EXCLUDE_4TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[4], m[4]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[4]);
+ valid_mask &= EXCLUDE_5TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[5], m[5]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[5]);
+ valid_mask &= EXCLUDE_6TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[6], m[6]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[6]);
+ valid_mask &= EXCLUDE_7TH_PKT;
+ }
+ if (is_valid_ipv4_pkt(ipv4_hdr[7], m[7]->pkt_len) < 0) {
+ rte_pktmbuf_free(m[7]);
+ valid_mask &= EXCLUDE_8TH_PKT;
+ }
+ if (unlikely(valid_mask != MASK_ALL_PKTS)) {
+ if (valid_mask == 0)
+ return;
+
+ uint8_t i = 0;
+
+ for (i = 0; i < 8; i++)
+ if ((0x1 << i) & valid_mask)
+ l3fwd_simple_forward(m[i], portid);
+ }
+#endif /* End of #ifdef DO_RFC_1812_CHECKS */
+
+ data[0] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[0], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[1] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[1], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[2] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[2], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[3] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[3], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[4] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[4], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[5] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[5], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[6] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[6], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+ data[7] = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m[7], __m128i *,
+ sizeof(struct ether_hdr) +
+ offsetof(struct ipv4_hdr, time_to_live)));
+
+ key[0].xmm = _mm_and_si128(data[0], mask0);
+ key[1].xmm = _mm_and_si128(data[1], mask0);
+ key[2].xmm = _mm_and_si128(data[2], mask0);
+ key[3].xmm = _mm_and_si128(data[3], mask0);
+ key[4].xmm = _mm_and_si128(data[4], mask0);
+ key[5].xmm = _mm_and_si128(data[5], mask0);
+ key[6].xmm = _mm_and_si128(data[6], mask0);
+ key[7].xmm = _mm_and_si128(data[7], mask0);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
+
+ rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
+ &key_array[0], 8, ret);
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv4_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv4_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv4_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv4_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv4_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv4_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv4_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv4_l3fwd_out_if[ret[7]]);
+
+ if (dst_port[0] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[0]) == 0)
+ dst_port[0] = portid;
+ if (dst_port[1] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[1]) == 0)
+ dst_port[1] = portid;
+ if (dst_port[2] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[2]) == 0)
+ dst_port[2] = portid;
+ if (dst_port[3] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[3]) == 0)
+ dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr[0]->time_to_live);
+ --(ipv4_hdr[1]->time_to_live);
+ --(ipv4_hdr[2]->time_to_live);
+ --(ipv4_hdr[3]->time_to_live);
+ ++(ipv4_hdr[0]->hdr_checksum);
+ ++(ipv4_hdr[1]->hdr_checksum);
+ ++(ipv4_hdr[2]->hdr_checksum);
+ ++(ipv4_hdr[3]->hdr_checksum);
+ --(ipv4_hdr[4]->time_to_live);
+ --(ipv4_hdr[5]->time_to_live);
+ --(ipv4_hdr[6]->time_to_live);
+ --(ipv4_hdr[7]->time_to_live);
+ ++(ipv4_hdr[4]->hdr_checksum);
+ ++(ipv4_hdr[5]->hdr_checksum);
+ ++(ipv4_hdr[6]->hdr_checksum);
+ ++(ipv4_hdr[7]->hdr_checksum);
+#endif
+
+ /* dst addr */
+ *(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
+ *(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
+ *(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
+ *(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
+
+ send_single_packet(m[0], (uint8_t)dst_port[0]);
+ send_single_packet(m[1], (uint8_t)dst_port[1]);
+ send_single_packet(m[2], (uint8_t)dst_port[2]);
+ send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
+
+}
+
+static inline void get_ipv6_5tuple(struct rte_mbuf *m0, __m128i mask0,
+ __m128i mask1, union ipv6_5tuple_host *key)
+{
+ __m128i tmpdata0 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
+ __m128i *, sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len)));
+ __m128i tmpdata1 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
+ __m128i *, sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i)));
+ __m128i tmpdata2 = _mm_loadu_si128(rte_pktmbuf_mtod_offset(m0,
+ __m128i *, sizeof(struct ether_hdr) +
+ offsetof(struct ipv6_hdr, payload_len) + sizeof(__m128i) +
+ sizeof(__m128i)));
+ key->xmm[0] = _mm_and_si128(tmpdata0, mask0);
+ key->xmm[1] = tmpdata1;
+ key->xmm[2] = _mm_and_si128(tmpdata2, mask1);
+}
+
+static inline void
+simple_ipv6_fwd_8pkts(struct rte_mbuf *m[8], uint8_t portid)
+{
+ int32_t ret[8];
+ uint8_t dst_port[8];
+ struct ether_hdr *eth_hdr[8];
+ union ipv6_5tuple_host key[8];
+
+ __attribute__((unused)) struct ipv6_hdr *ipv6_hdr[8];
+
+ eth_hdr[0] = rte_pktmbuf_mtod(m[0], struct ether_hdr *);
+ eth_hdr[1] = rte_pktmbuf_mtod(m[1], struct ether_hdr *);
+ eth_hdr[2] = rte_pktmbuf_mtod(m[2], struct ether_hdr *);
+ eth_hdr[3] = rte_pktmbuf_mtod(m[3], struct ether_hdr *);
+ eth_hdr[4] = rte_pktmbuf_mtod(m[4], struct ether_hdr *);
+ eth_hdr[5] = rte_pktmbuf_mtod(m[5], struct ether_hdr *);
+ eth_hdr[6] = rte_pktmbuf_mtod(m[6], struct ether_hdr *);
+ eth_hdr[7] = rte_pktmbuf_mtod(m[7], struct ether_hdr *);
+
+ /* Handle IPv6 headers.*/
+ ipv6_hdr[0] = rte_pktmbuf_mtod_offset(m[0], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[1] = rte_pktmbuf_mtod_offset(m[1], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[2] = rte_pktmbuf_mtod_offset(m[2], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[3] = rte_pktmbuf_mtod_offset(m[3], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[4] = rte_pktmbuf_mtod_offset(m[4], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[5] = rte_pktmbuf_mtod_offset(m[5], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[6] = rte_pktmbuf_mtod_offset(m[6], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+ ipv6_hdr[7] = rte_pktmbuf_mtod_offset(m[7], struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ get_ipv6_5tuple(m[0], mask1, mask2, &key[0]);
+ get_ipv6_5tuple(m[1], mask1, mask2, &key[1]);
+ get_ipv6_5tuple(m[2], mask1, mask2, &key[2]);
+ get_ipv6_5tuple(m[3], mask1, mask2, &key[3]);
+ get_ipv6_5tuple(m[4], mask1, mask2, &key[4]);
+ get_ipv6_5tuple(m[5], mask1, mask2, &key[5]);
+ get_ipv6_5tuple(m[6], mask1, mask2, &key[6]);
+ get_ipv6_5tuple(m[7], mask1, mask2, &key[7]);
+
+ const void *key_array[8] = {&key[0], &key[1], &key[2], &key[3],
+ &key[4], &key[5], &key[6], &key[7]};
+
+ rte_hash_lookup_multi(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
+ &key_array[0], 4, ret);
+ dst_port[0] = (uint8_t) ((ret[0] < 0) ? portid : ipv6_l3fwd_out_if[ret[0]]);
+ dst_port[1] = (uint8_t) ((ret[1] < 0) ? portid : ipv6_l3fwd_out_if[ret[1]]);
+ dst_port[2] = (uint8_t) ((ret[2] < 0) ? portid : ipv6_l3fwd_out_if[ret[2]]);
+ dst_port[3] = (uint8_t) ((ret[3] < 0) ? portid : ipv6_l3fwd_out_if[ret[3]]);
+ dst_port[4] = (uint8_t) ((ret[4] < 0) ? portid : ipv6_l3fwd_out_if[ret[4]]);
+ dst_port[5] = (uint8_t) ((ret[5] < 0) ? portid : ipv6_l3fwd_out_if[ret[5]]);
+ dst_port[6] = (uint8_t) ((ret[6] < 0) ? portid : ipv6_l3fwd_out_if[ret[6]]);
+ dst_port[7] = (uint8_t) ((ret[7] < 0) ? portid : ipv6_l3fwd_out_if[ret[7]]);
+
+ if (dst_port[0] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[0]) == 0)
+ dst_port[0] = portid;
+ if (dst_port[1] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[1]) == 0)
+ dst_port[1] = portid;
+ if (dst_port[2] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[2]) == 0)
+ dst_port[2] = portid;
+ if (dst_port[3] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[3]) == 0)
+ dst_port[3] = portid;
+ if (dst_port[4] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[4]) == 0)
+ dst_port[4] = portid;
+ if (dst_port[5] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[5]) == 0)
+ dst_port[5] = portid;
+ if (dst_port[6] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[6]) == 0)
+ dst_port[6] = portid;
+ if (dst_port[7] >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port[7]) == 0)
+ dst_port[7] = portid;
+
+ /* dst addr */
+ *(uint64_t *)&eth_hdr[0]->d_addr = dest_eth_addr[dst_port[0]];
+ *(uint64_t *)&eth_hdr[1]->d_addr = dest_eth_addr[dst_port[1]];
+ *(uint64_t *)&eth_hdr[2]->d_addr = dest_eth_addr[dst_port[2]];
+ *(uint64_t *)&eth_hdr[3]->d_addr = dest_eth_addr[dst_port[3]];
+ *(uint64_t *)&eth_hdr[4]->d_addr = dest_eth_addr[dst_port[4]];
+ *(uint64_t *)&eth_hdr[5]->d_addr = dest_eth_addr[dst_port[5]];
+ *(uint64_t *)&eth_hdr[6]->d_addr = dest_eth_addr[dst_port[6]];
+ *(uint64_t *)&eth_hdr[7]->d_addr = dest_eth_addr[dst_port[7]];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port[0]], &eth_hdr[0]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[1]], &eth_hdr[1]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[2]], &eth_hdr[2]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[3]], &eth_hdr[3]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[4]], &eth_hdr[4]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[5]], &eth_hdr[5]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[6]], &eth_hdr[6]->s_addr);
+ ether_addr_copy(&ports_eth_addr[dst_port[7]], &eth_hdr[7]->s_addr);
+
+ send_single_packet(m[0], (uint8_t)dst_port[0]);
+ send_single_packet(m[1], (uint8_t)dst_port[1]);
+ send_single_packet(m[2], (uint8_t)dst_port[2]);
+ send_single_packet(m[3], (uint8_t)dst_port[3]);
+ send_single_packet(m[4], (uint8_t)dst_port[4]);
+ send_single_packet(m[5], (uint8_t)dst_port[5]);
+ send_single_packet(m[6], (uint8_t)dst_port[6]);
+ send_single_packet(m[7], (uint8_t)dst_port[7]);
+
+}
+#endif /* APP_LOOKUP_METHOD */
+
+static inline __attribute__((always_inline)) void
+l3fwd_simple_forward(struct rte_mbuf *m, uint8_t portid)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ uint8_t dst_port;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ if (RTE_ETH_IS_IPV4_HDR(m->packet_type)) {
+ /* Handle IPv4 headers.*/
+ ipv4_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *,
+ sizeof(struct ether_hdr));
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Check to make sure the packet is valid (RFC1812) */
+ if (is_valid_ipv4_pkt(ipv4_hdr, m->pkt_len) < 0) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+#endif
+
+ dst_port = get_ipv4_dst_port(ipv4_hdr, portid,
+ RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct);
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+#ifdef DO_RFC_1812_CHECKS
+ /* Update time to live and header checksum */
+ --(ipv4_hdr->time_to_live);
+ ++(ipv4_hdr->hdr_checksum);
+#endif
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+ } else if (RTE_ETH_IS_IPV6_HDR(m->packet_type)) {
+ /* Handle IPv6 headers.*/
+ struct ipv6_hdr *ipv6_hdr;
+
+ ipv6_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *,
+ sizeof(struct ether_hdr));
+
+ dst_port = get_ipv6_dst_port(ipv6_hdr, portid,
+ RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct);
+
+ if (dst_port >= RTE_MAX_ETHPORTS ||
+ (enabled_port_mask & 1 << dst_port) == 0)
+ dst_port = portid;
+
+ /* dst addr */
+ *(uint64_t *)&eth_hdr->d_addr = dest_eth_addr[dst_port];
+
+ /* src addr */
+ ether_addr_copy(&ports_eth_addr[dst_port], &eth_hdr->s_addr);
+
+ send_single_packet(m, dst_port);
+ } else
+ /* Free the mbuf that contains non-IPV4/IPV6 packet */
+ rte_pktmbuf_free(m);
+}
+
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
+ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+#ifdef DO_RFC_1812_CHECKS
+
+#define IPV4_MIN_VER_IHL 0x45
+#define IPV4_MAX_VER_IHL 0x4f
+#define IPV4_MAX_VER_IHL_DIFF (IPV4_MAX_VER_IHL - IPV4_MIN_VER_IHL)
+
+/* Minimum value of IPV4 total length (20B) in network byte order. */
+#define IPV4_MIN_LEN_BE (sizeof(struct ipv4_hdr) << 8)
+
+/*
+ * From http://www.rfc-editor.org/rfc/rfc1812.txt section 5.2.2:
+ * - The IP version number must be 4.
+ * - The IP header length field must be large enough to hold the
+ * minimum length legal IP datagram (20 bytes = 5 words).
+ * - The IP total length field must be large enough to hold the IP
+ * datagram header, whose length is specified in the IP header length
+ * field.
+ * If we encounter invalid IPV4 packet, then set destination port for it
+ * to BAD_PORT value.
+ */
+static inline __attribute__((always_inline)) void
+rfc1812_process(struct ipv4_hdr *ipv4_hdr, uint32_t *dp, uint32_t ptype)
+{
+ uint8_t ihl;
+
+ if (RTE_ETH_IS_IPV4_HDR(ptype)) {
+ ihl = ipv4_hdr->version_ihl - IPV4_MIN_VER_IHL;
+
+ ipv4_hdr->time_to_live--;
+ ipv4_hdr->hdr_checksum++;
+
+ if (ihl > IPV4_MAX_VER_IHL_DIFF ||
+ ((uint8_t)ipv4_hdr->total_length == 0 &&
+ ipv4_hdr->total_length < IPV4_MIN_LEN_BE)) {
+ dp[0] = BAD_PORT;
+ }
+ }
+}
+
+#else
+#define rfc1812_process(mb, dp) do { } while (0)
+#endif /* DO_RFC_1812_CHECKS */
+#endif /* APP_LOOKUP_LPM && ENABLE_MULTI_BUFFER_OPTIMIZE */
+
+
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
+ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+
+static inline __attribute__((always_inline)) uint16_t
+get_dst_port(struct rte_mbuf *pkt, uint32_t dst_ipv4, uint8_t portid)
+{
+ uint32_t next_hop_ipv4;
+ uint8_t next_hop_ipv6;
+ struct ipv6_hdr *ipv6_hdr;
+ struct ether_hdr *eth_hdr;
+
+ if (RTE_ETH_IS_IPV4_HDR(pkt->packet_type)) {
+ if (rte_lpm_lookup(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct,
+ dst_ipv4, &next_hop_ipv4) != 0) {
+ next_hop_ipv4 = portid;
+ return next_hop_ipv4;
+ }
+ } else if (RTE_ETH_IS_IPV6_HDR(pkt->packet_type)) {
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv6_hdr = (struct ipv6_hdr *)(eth_hdr + 1);
+ if (rte_lpm6_lookup(RTE_PER_LCORE(lcore_conf)->ipv6_lookup_struct,
+ ipv6_hdr->dst_addr, &next_hop_ipv6) != 0) {
+ next_hop_ipv6 = portid;
+ return next_hop_ipv6;
+ }
+ } else {
+ next_hop_ipv4 = portid;
+ return next_hop_ipv4;
+ }
+
+}
+
+static inline void
+process_packet(struct rte_mbuf *pkt, uint32_t *dst_port, uint8_t portid)
+{
+ struct ether_hdr *eth_hdr;
+ struct ipv4_hdr *ipv4_hdr;
+ uint32_t dst_ipv4;
+ uint16_t dp;
+ __m128i te, ve;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+
+ dst_ipv4 = ipv4_hdr->dst_addr;
+ dst_ipv4 = rte_be_to_cpu_32(dst_ipv4);
+ dp = get_dst_port(pkt, dst_ipv4, portid);
+
+ te = _mm_load_si128((__m128i *)eth_hdr);
+ ve = val_eth[dp];
+
+ dst_port[0] = dp;
+ rfc1812_process(ipv4_hdr, dst_port, pkt->packet_type);
+
+ te = _mm_blend_epi16(te, ve, MASK_ETH);
+ _mm_store_si128((__m128i *)eth_hdr, te);
+}
+
+/*
+ * Read packet_type and destination IPV4 addresses from 4 mbufs.
+ */
+static inline void
+processx4_step1(struct rte_mbuf *pkt[FWDSTEP],
+ __m128i *dip,
+ uint32_t *ipv4_flag)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ether_hdr *eth_hdr;
+ uint32_t x0, x1, x2, x3;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[0], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x0 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] = pkt[0]->packet_type & RTE_PTYPE_L3_IPV4;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[1], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x1 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[1]->packet_type;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[2], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x2 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[2]->packet_type;
+
+ eth_hdr = rte_pktmbuf_mtod(pkt[3], struct ether_hdr *);
+ ipv4_hdr = (struct ipv4_hdr *)(eth_hdr + 1);
+ x3 = ipv4_hdr->dst_addr;
+ ipv4_flag[0] &= pkt[3]->packet_type;
+
+ dip[0] = _mm_set_epi32(x3, x2, x1, x0);
+}
+
+/*
+ * Lookup into LPM for destination port.
+ * If lookup fails, use incoming port (portid) as destination port.
+ */
+static inline void
+processx4_step2(__m128i dip,
+ uint32_t ipv4_flag,
+ uint32_t portid,
+ struct rte_mbuf *pkt[FWDSTEP],
+ uint32_t dprt[FWDSTEP])
+{
+ rte_xmm_t dst;
+ const __m128i bswap_mask = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
+ 4, 5, 6, 7, 0, 1, 2, 3);
+
+ /* Byte swap 4 IPV4 addresses. */
+ dip = _mm_shuffle_epi8(dip, bswap_mask);
+
+ /* if all 4 packets are IPV4. */
+ if (likely(ipv4_flag)) {
+ rte_lpm_lookupx4(RTE_PER_LCORE(lcore_conf)->ipv4_lookup_struct, dip,
+ dprt, portid);
+ } else {
+ dst.x = dip;
+ dprt[0] = get_dst_port(pkt[0], dst.u32[0], portid);
+ dprt[1] = get_dst_port(pkt[1], dst.u32[1], portid);
+ dprt[2] = get_dst_port(pkt[2], dst.u32[2], portid);
+ dprt[3] = get_dst_port(pkt[3], dst.u32[3], portid);
+ }
+}
+
+/*
+ * Update source and destination MAC addresses in the ethernet header.
+ * Perform RFC1812 checks and updates for IPV4 packets.
+ */
+static inline void
+processx4_step3(struct rte_mbuf *pkt[FWDSTEP], uint32_t dst_port[FWDSTEP])
+{
+ __m128i te[FWDSTEP];
+ __m128i ve[FWDSTEP];
+ __m128i *p[FWDSTEP];
+
+ p[0] = rte_pktmbuf_mtod(pkt[0], __m128i *);
+ p[1] = rte_pktmbuf_mtod(pkt[1], __m128i *);
+ p[2] = rte_pktmbuf_mtod(pkt[2], __m128i *);
+ p[3] = rte_pktmbuf_mtod(pkt[3], __m128i *);
+
+ ve[0] = val_eth[dst_port[0]];
+ te[0] = _mm_load_si128(p[0]);
+
+ ve[1] = val_eth[dst_port[1]];
+ te[1] = _mm_load_si128(p[1]);
+
+ ve[2] = val_eth[dst_port[2]];
+ te[2] = _mm_load_si128(p[2]);
+
+ ve[3] = val_eth[dst_port[3]];
+ te[3] = _mm_load_si128(p[3]);
+
+ /* Update first 12 bytes, keep rest bytes intact. */
+ te[0] = _mm_blend_epi16(te[0], ve[0], MASK_ETH);
+ te[1] = _mm_blend_epi16(te[1], ve[1], MASK_ETH);
+ te[2] = _mm_blend_epi16(te[2], ve[2], MASK_ETH);
+ te[3] = _mm_blend_epi16(te[3], ve[3], MASK_ETH);
+
+ _mm_store_si128(p[0], te[0]);
+ _mm_store_si128(p[1], te[1]);
+ _mm_store_si128(p[2], te[2]);
+ _mm_store_si128(p[3], te[3]);
+
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[0] + 1),
+ &dst_port[0], pkt[0]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[1] + 1),
+ &dst_port[1], pkt[1]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[2] + 1),
+ &dst_port[2], pkt[2]->packet_type);
+ rfc1812_process((struct ipv4_hdr *)((struct ether_hdr *)p[3] + 1),
+ &dst_port[3], pkt[3]->packet_type);
+}
+
+/*
+ * We group consecutive packets with the same destionation port into one burst.
+ * To avoid extra latency this is done together with some other packet
+ * processing, but after we made a final decision about packet's destination.
+ * To do this we maintain:
+ * pnum - array of number of consecutive packets with the same dest port for
+ * each packet in the input burst.
+ * lp - pointer to the last updated element in the pnum.
+ * dlp - dest port value lp corresponds to.
+ */
+
+#define GRPSZ (1 << FWDSTEP)
+#define GRPMSK (GRPSZ - 1)
+
+#define GROUP_PORT_STEP(dlp, dcp, lp, pn, idx) do { \
+ if (likely((dlp) == (dcp)[(idx)])) { \
+ (lp)[0]++; \
+ } else { \
+ (dlp) = (dcp)[idx]; \
+ (lp) = (pn) + (idx); \
+ (lp)[0] = 1; \
+ } \
+} while (0)
+
+/*
+ * Group consecutive packets with the same destination port in bursts of 4.
+ * Suppose we have array of destionation ports:
+ * dst_port[] = {a, b, c, d,, e, ... }
+ * dp1 should contain: <a, b, c, d>, dp2: <b, c, d, e>.
+ * We doing 4 comparisions at once and the result is 4 bit mask.
+ * This mask is used as an index into prebuild array of pnum values.
+ */
+static inline uint16_t *
+port_groupx4(uint16_t pn[FWDSTEP + 1], uint16_t *lp, __m128i dp1, __m128i dp2)
+{
+ static const struct {
+ uint64_t pnum; /* prebuild 4 values for pnum[]. */
+ int32_t idx; /* index for new last updated elemnet. */
+ uint16_t lpv; /* add value to the last updated element. */
+ } gptbl[GRPSZ] = {
+ {
+ /* 0: a != b, b != c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100010001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 1: a == b, b != c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100010002),
+ .idx = 4,
+ .lpv = 1,
+ },
+ {
+ /* 2: a != b, b == c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100020001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 3: a == b, b == c, c != d, d != e */
+ .pnum = UINT64_C(0x0001000100020003),
+ .idx = 4,
+ .lpv = 2,
+ },
+ {
+ /* 4: a != b, b != c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200010001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 5: a == b, b != c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200010002),
+ .idx = 4,
+ .lpv = 1,
+ },
+ {
+ /* 6: a != b, b == c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200030001),
+ .idx = 4,
+ .lpv = 0,
+ },
+ {
+ /* 7: a == b, b == c, c == d, d != e */
+ .pnum = UINT64_C(0x0001000200030004),
+ .idx = 4,
+ .lpv = 3,
+ },
+ {
+ /* 8: a != b, b != c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100010001),
+ .idx = 3,
+ .lpv = 0,
+ },
+ {
+ /* 9: a == b, b != c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100010002),
+ .idx = 3,
+ .lpv = 1,
+ },
+ {
+ /* 0xa: a != b, b == c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100020001),
+ .idx = 3,
+ .lpv = 0,
+ },
+ {
+ /* 0xb: a == b, b == c, c != d, d == e */
+ .pnum = UINT64_C(0x0002000100020003),
+ .idx = 3,
+ .lpv = 2,
+ },
+ {
+ /* 0xc: a != b, b != c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300010001),
+ .idx = 2,
+ .lpv = 0,
+ },
+ {
+ /* 0xd: a == b, b != c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300010002),
+ .idx = 2,
+ .lpv = 1,
+ },
+ {
+ /* 0xe: a != b, b == c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300040001),
+ .idx = 1,
+ .lpv = 0,
+ },
+ {
+ /* 0xf: a == b, b == c, c == d, d == e */
+ .pnum = UINT64_C(0x0002000300040005),
+ .idx = 0,
+ .lpv = 4,
+ },
+ };
+
+ union {
+ uint16_t u16[FWDSTEP + 1];
+ uint64_t u64;
+ } *pnum = (void *)pn;
+
+ int32_t v;
+
+ dp1 = _mm_cmpeq_epi16(dp1, dp2);
+ dp1 = _mm_unpacklo_epi16(dp1, dp1);
+ v = _mm_movemask_ps((__m128)dp1);
+
+ /* update last port counter. */
+ lp[0] += gptbl[v].lpv;
+
+ /* if dest port value has changed. */
+ if (v != GRPMSK) {
+ lp = pnum->u16 + gptbl[v].idx;
+ lp[0] = 1;
+ pnum->u64 = gptbl[v].pnum;
+ }
+
+ return lp;
+}
+
+#endif /* APP_LOOKUP_METHOD */
+
+static void
+process_burst(struct rte_mbuf *pkts_burst[MAX_PKT_BURST], int nb_rx,
+ uint8_t portid) {
+
+ int j;
+
+#if ((APP_LOOKUP_METHOD == APP_LOOKUP_LPM) && \
+ (ENABLE_MULTI_BUFFER_OPTIMIZE == 1))
+ int32_t k;
+ uint16_t dlp;
+ uint16_t *lp;
+ uint32_t dst_port[MAX_PKT_BURST];
+ __m128i dip[MAX_PKT_BURST / FWDSTEP];
+ uint32_t ipv4_flag[MAX_PKT_BURST / FWDSTEP];
+ uint16_t pnum[MAX_PKT_BURST + 1];
+#endif
+
+
+#if (ENABLE_MULTI_BUFFER_OPTIMIZE == 1)
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+ {
+ /*
+ * Send nb_rx - nb_rx%8 packets
+ * in groups of 8.
+ */
+ int32_t n = RTE_ALIGN_FLOOR(nb_rx, 8);
+
+ for (j = 0; j < n; j += 8) {
+ uint32_t pkt_type =
+ pkts_burst[j]->packet_type &
+ pkts_burst[j+1]->packet_type &
+ pkts_burst[j+2]->packet_type &
+ pkts_burst[j+3]->packet_type &
+ pkts_burst[j+4]->packet_type &
+ pkts_burst[j+5]->packet_type &
+ pkts_burst[j+6]->packet_type &
+ pkts_burst[j+7]->packet_type;
+ if (pkt_type & RTE_PTYPE_L3_IPV4) {
+ simple_ipv4_fwd_8pkts(&pkts_burst[j], portid);
+ } else if (pkt_type &
+ RTE_PTYPE_L3_IPV6) {
+ simple_ipv6_fwd_8pkts(&pkts_burst[j], portid);
+ } else {
+ l3fwd_simple_forward(pkts_burst[j], portid);
+ l3fwd_simple_forward(pkts_burst[j+1], portid);
+ l3fwd_simple_forward(pkts_burst[j+2], portid);
+ l3fwd_simple_forward(pkts_burst[j+3], portid);
+ l3fwd_simple_forward(pkts_burst[j+4], portid);
+ l3fwd_simple_forward(pkts_burst[j+5], portid);
+ l3fwd_simple_forward(pkts_burst[j+6], portid);
+ l3fwd_simple_forward(pkts_burst[j+7], portid);
+ }
+ }
+ for (; j < nb_rx ; j++)
+ l3fwd_simple_forward(pkts_burst[j], portid);
+ }
+#elif (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+
+ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step1(&pkts_burst[j], &dip[j / FWDSTEP],
+ &ipv4_flag[j / FWDSTEP]);
+
+ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+ for (j = 0; j != k; j += FWDSTEP)
+ processx4_step2(dip[j / FWDSTEP], ipv4_flag[j / FWDSTEP],
+ portid, &pkts_burst[j], &dst_port[j]);
+
+ /*
+ * Finish packet processing and group consecutive
+ * packets with the same destination port.
+ */
+ k = RTE_ALIGN_FLOOR(nb_rx, FWDSTEP);
+ if (k != 0) {
+ __m128i dp1, dp2;
+
+ lp = pnum;
+ lp[0] = 1;
+
+ processx4_step3(pkts_burst, dst_port);
+
+ /* dp1: <d[0], d[1], d[2], d[3], ... > */
+ dp1 = _mm_loadu_si128((__m128i *)dst_port);
+
+ for (j = FWDSTEP; j != k; j += FWDSTEP) {
+ processx4_step3(&pkts_burst[j], &dst_port[j]);
+
+ /*
+ * dp2:
+ * <d[j-3], d[j-2], d[j-1], d[j], ... >
+ */
+ dp2 = _mm_loadu_si128(
+ (__m128i *)&dst_port[j - FWDSTEP + 1]);
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * dp1:
+ * <d[j], d[j+1], d[j+2], d[j+3], ... >
+ */
+ dp1 = _mm_srli_si128(dp2, (FWDSTEP - 1) *
+ sizeof(dst_port[0]));
+ }
+
+ /*
+ * dp2: <d[j-3], d[j-2], d[j-1], d[j-1], ... >
+ */
+ dp2 = _mm_shufflelo_epi16(dp1, 0xf9);
+ lp = port_groupx4(&pnum[j - FWDSTEP], lp, dp1, dp2);
+
+ /*
+ * remove values added by the last repeated
+ * dst port.
+ */
+ lp[0]--;
+ dlp = dst_port[j - 1];
+ } else {
+ /* set dlp and lp to the never used values. */
+ dlp = BAD_PORT - 1;
+ lp = pnum + MAX_PKT_BURST;
+ }
+
+ /* Process up to last 3 packets one by one. */
+ switch (nb_rx % FWDSTEP) {
+ case 3:
+ process_packet(pkts_burst[j], dst_port + j, portid);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ case 2:
+ process_packet(pkts_burst[j], dst_port + j, portid);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ case 1:
+ process_packet(pkts_burst[j], dst_port + j, portid);
+ GROUP_PORT_STEP(dlp, dst_port, lp, pnum, j);
+ j++;
+ }
+
+ /*
+ * Send packets out, through destination port.
+ * Consecuteve pacekts with the same destination port
+ * are already grouped together.
+ * If destination port for the packet equals BAD_PORT,
+ * then free the packet without sending it out.
+ */
+ for (j = 0; j < nb_rx; j += k) {
+
+ int32_t m;
+ uint16_t pn;
+
+ pn = dst_port[j];
+ k = pnum[j];
+
+ if (likely(pn != BAD_PORT))
+ send_packetsx4(pn, pkts_burst + j, k);
+ else
+ for (m = j; m != j + k; m++)
+ rte_pktmbuf_free(pkts_burst[m]);
+
+ }
+
+#endif /* APP_LOOKUP_METHOD */
+#else /* ENABLE_MULTI_BUFFER_OPTIMIZE == 0 */
+
+ /* Prefetch first packets */
+ for (j = 0; j < PREFETCH_OFFSET && j < nb_rx; j++)
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[j], void *));
+
+ /* Prefetch and forward already prefetched packets */
+ for (j = 0; j < (nb_rx - PREFETCH_OFFSET); j++) {
+ rte_prefetch0(rte_pktmbuf_mtod(pkts_burst[
+ j + PREFETCH_OFFSET], void *));
+ l3fwd_simple_forward(pkts_burst[j], portid);
+ }
+
+ /* Forward remaining prefetched packets */
+ for (; j < nb_rx; j++)
+ l3fwd_simple_forward(pkts_burst[j], portid);
+
+#endif /* ENABLE_MULTI_BUFFER_OPTIMIZE */
+
+}
+
+#if (APP_CPU_LOAD > 0)
+
+/*
+ * CPU-load stats collector
+ */
+static int
+cpu_load_collector(__rte_unused void *arg) {
+ unsigned i, j, k;
+ uint64_t hits;
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ uint64_t total[MAX_CPU] = { 0 };
+ unsigned min_cpu = MAX_CPU;
+ unsigned max_cpu = 0;
+ unsigned cpu_id;
+ int busy_total = 0;
+ int busy_flag = 0;
+
+ unsigned int n_thread_per_cpu[MAX_CPU] = { 0 };
+ struct thread_conf *thread_per_cpu[MAX_CPU][MAX_THREAD];
+
+ struct thread_conf *thread_conf;
+
+ const uint64_t interval_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * CPU_LOAD_TIMEOUT_US;
+
+ prev_tsc = 0;
+ /*
+ * Wait for all threads
+ */
+
+ printf("Waiting for %d rx threads and %d tx threads\n", n_rx_thread,
+ n_tx_thread);
+
+ while (rte_atomic16_read(&rx_counter) < n_rx_thread)
+ rte_pause();
+
+ while (rte_atomic16_read(&tx_counter) < n_tx_thread)
+ rte_pause();
+
+ for (i = 0; i < n_rx_thread; i++) {
+
+ thread_conf = &rx_thread[i].conf;
+ cpu_id = thread_conf->cpu_id;
+ thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
+
+ if (cpu_id > max_cpu)
+ max_cpu = cpu_id;
+ if (cpu_id < min_cpu)
+ min_cpu = cpu_id;
+ }
+ for (i = 0; i < n_tx_thread; i++) {
+
+ thread_conf = &tx_thread[i].conf;
+ cpu_id = thread_conf->cpu_id;
+ thread_per_cpu[cpu_id][n_thread_per_cpu[cpu_id]++] = thread_conf;
+
+ if (thread_conf->cpu_id > max_cpu)
+ max_cpu = thread_conf->cpu_id;
+ if (thread_conf->cpu_id < min_cpu)
+ min_cpu = thread_conf->cpu_id;
+ }
+
+ while (1) {
+
+ cpu_load.counter++;
+ for (i = min_cpu; i <= max_cpu; i++) {
+ for (j = 0; j < MAX_CPU_COUNTER; j++) {
+ for (k = 0; k < n_thread_per_cpu[i]; k++)
+ if (thread_per_cpu[i][k]->busy[j]) {
+ busy_flag = 1;
+ break;
+ }
+ if (busy_flag) {
+ cpu_load.hits[j][i]++;
+ busy_total = 1;
+ busy_flag = 0;
+ }
+ }
+
+ if (busy_total) {
+ total[i]++;
+ busy_total = 0;
+ }
+ }
+
+ cur_tsc = rte_rdtsc();
+
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > interval_tsc)) {
+
+ printf("\033c");
+
+ printf("Cpu usage for %d rx threads and %d tx threads:\n\n",
+ n_rx_thread, n_tx_thread);
+
+ printf("cpu# proc%% poll%% overhead%%\n\n");
+
+ for (i = min_cpu; i <= max_cpu; i++) {
+ hits = 0;
+ printf("CPU %d:", i);
+ for (j = 0; j < MAX_CPU_COUNTER; j++) {
+ printf("%7" PRIu64 "",
+ cpu_load.hits[j][i] * 100 / cpu_load.counter);
+ hits += cpu_load.hits[j][i];
+ cpu_load.hits[j][i] = 0;
+ }
+ printf("%7" PRIu64 "\n",
+ 100 - total[i] * 100 / cpu_load.counter);
+ total[i] = 0;
+ }
+ cpu_load.counter = 0;
+
+ prev_tsc = cur_tsc;
+ }
+
+ }
+}
+#endif /* APP_CPU_LOAD */
+
+/*
+ * Null processing lthread loop
+ *
+ * This loop is used to start empty scheduler on lcore.
+ */
+static void
+lthread_null(__rte_unused void *args)
+{
+ int lcore_id = rte_lcore_id();
+
+ RTE_LOG(INFO, L3FWD, "Starting scheduler on lcore %d.\n", lcore_id);
+ lthread_exit(NULL);
+}
+
+/* main processing loop */
+static void
+lthread_tx_per_ring(void *dummy)
+{
+ int nb_rx;
+ uint8_t portid;
+ struct rte_ring *ring;
+ struct thread_tx_conf *tx_conf;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct lthread_cond *ready;
+
+ tx_conf = (struct thread_tx_conf *)dummy;
+ ring = tx_conf->ring;
+ ready = *tx_conf->ready;
+
+ lthread_set_data((void *)tx_conf);
+
+ /*
+ * Move this lthread to lcore
+ */
+ lthread_set_affinity(tx_conf->conf.lcore_id);
+
+ RTE_LOG(INFO, L3FWD, "entering main tx loop on lcore %u\n", rte_lcore_id());
+
+ nb_rx = 0;
+ rte_atomic16_inc(&tx_counter);
+ while (1) {
+
+ /*
+ * Read packet from ring
+ */
+ SET_CPU_BUSY(tx_conf, CPU_POLL);
+ nb_rx = rte_ring_sc_dequeue_burst(ring, (void **)pkts_burst,
+ MAX_PKT_BURST);
+ SET_CPU_IDLE(tx_conf, CPU_POLL);
+
+ if (nb_rx > 0) {
+ SET_CPU_BUSY(tx_conf, CPU_PROCESS);
+ portid = pkts_burst[0]->port;
+ process_burst(pkts_burst, nb_rx, portid);
+ SET_CPU_IDLE(tx_conf, CPU_PROCESS);
+ lthread_yield();
+ } else
+ lthread_cond_wait(ready, 0);
+
+ }
+}
+
+/*
+ * Main tx-lthreads spawner lthread.
+ *
+ * This lthread is used to spawn one new lthread per ring from producers.
+ *
+ */
+static void
+lthread_tx(void *args)
+{
+ struct lthread *lt;
+
+ unsigned lcore_id;
+ uint8_t portid;
+ struct thread_tx_conf *tx_conf;
+
+ tx_conf = (struct thread_tx_conf *)args;
+ lthread_set_data((void *)tx_conf);
+
+ /*
+ * Move this lthread to the selected lcore
+ */
+ lthread_set_affinity(tx_conf->conf.lcore_id);
+
+ /*
+ * Spawn tx readers (one per input ring)
+ */
+ lthread_create(&lt, tx_conf->conf.lcore_id, lthread_tx_per_ring,
+ (void *)tx_conf);
+
+ lcore_id = rte_lcore_id();
+
+ RTE_LOG(INFO, L3FWD, "Entering Tx main loop on lcore %u\n", lcore_id);
+
+ tx_conf->conf.cpu_id = sched_getcpu();
+ while (1) {
+
+ lthread_sleep(BURST_TX_DRAIN_US * 1000);
+
+ /*
+ * TX burst queue drain
+ */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (tx_conf->tx_mbufs[portid].len == 0)
+ continue;
+ SET_CPU_BUSY(tx_conf, CPU_PROCESS);
+ send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
+ SET_CPU_IDLE(tx_conf, CPU_PROCESS);
+ tx_conf->tx_mbufs[portid].len = 0;
+ }
+
+ }
+}
+
+static void
+lthread_rx(void *dummy)
+{
+ int ret;
+ uint16_t nb_rx;
+ int i;
+ uint8_t portid, queueid;
+ int worker_id;
+ int len[RTE_MAX_LCORE] = { 0 };
+ int old_len, new_len;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct thread_rx_conf *rx_conf;
+
+ rx_conf = (struct thread_rx_conf *)dummy;
+ lthread_set_data((void *)rx_conf);
+
+ /*
+ * Move this lthread to lcore
+ */
+ lthread_set_affinity(rx_conf->conf.lcore_id);
+
+ if (rx_conf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", rte_lcore_id());
+ return;
+ }
+
+ RTE_LOG(INFO, L3FWD, "Entering main Rx loop on lcore %u\n", rte_lcore_id());
+
+ for (i = 0; i < rx_conf->n_rx_queue; i++) {
+
+ portid = rx_conf->rx_queue_list[i].port_id;
+ queueid = rx_conf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ rte_lcore_id(), portid, queueid);
+ }
+
+ /*
+ * Init all condition variables (one per rx thread)
+ */
+ for (i = 0; i < rx_conf->n_rx_queue; i++)
+ lthread_cond_init(NULL, &rx_conf->ready[i], NULL);
+
+ worker_id = 0;
+
+ rx_conf->conf.cpu_id = sched_getcpu();
+ rte_atomic16_inc(&rx_counter);
+ while (1) {
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < rx_conf->n_rx_queue; ++i) {
+ portid = rx_conf->rx_queue_list[i].port_id;
+ queueid = rx_conf->rx_queue_list[i].queue_id;
+
+ SET_CPU_BUSY(rx_conf, CPU_POLL);
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+ SET_CPU_IDLE(rx_conf, CPU_POLL);
+
+ if (nb_rx != 0) {
+ worker_id = (worker_id + 1) % rx_conf->n_ring;
+ old_len = len[worker_id];
+
+ SET_CPU_BUSY(rx_conf, CPU_PROCESS);
+ ret = rte_ring_sp_enqueue_burst(
+ rx_conf->ring[worker_id],
+ (void **) pkts_burst,
+ nb_rx);
+
+ new_len = old_len + ret;
+
+ if (new_len >= BURST_SIZE) {
+ lthread_cond_signal(rx_conf->ready[worker_id]);
+ new_len = 0;
+ }
+
+ len[worker_id] = new_len;
+
+ if (unlikely(ret < nb_rx)) {
+ uint32_t k;
+
+ for (k = ret; k < nb_rx; k++) {
+ struct rte_mbuf *m = pkts_burst[k];
+
+ rte_pktmbuf_free(m);
+ }
+ }
+ SET_CPU_IDLE(rx_conf, CPU_PROCESS);
+ }
+
+ lthread_yield();
+ }
+ }
+}
+
+/*
+ * Start scheduler with initial lthread on lcore
+ *
+ * This lthread loop spawns all rx and tx lthreads on master lcore
+ */
+
+static void
+lthread_spawner(__rte_unused void *arg) {
+ struct lthread *lt[MAX_THREAD];
+ int i;
+ int n_thread = 0;
+
+ printf("Entering lthread_spawner\n");
+
+ /*
+ * Create producers (rx threads) on default lcore
+ */
+ for (i = 0; i < n_rx_thread; i++) {
+ rx_thread[i].conf.thread_id = i;
+ lthread_create(&lt[n_thread], -1, lthread_rx,
+ (void *)&rx_thread[i]);
+ n_thread++;
+ }
+
+ /*
+ * Wait for all producers. Until some producers can be started on the same
+ * scheduler as this lthread, yielding is required to let them to run and
+ * prevent deadlock here.
+ */
+ while (rte_atomic16_read(&rx_counter) < n_rx_thread)
+ lthread_sleep(100000);
+
+ /*
+ * Create consumers (tx threads) on default lcore_id
+ */
+ for (i = 0; i < n_tx_thread; i++) {
+ tx_thread[i].conf.thread_id = i;
+ lthread_create(&lt[n_thread], -1, lthread_tx,
+ (void *)&tx_thread[i]);
+ n_thread++;
+ }
+
+ /*
+ * Wait for all threads finished
+ */
+ for (i = 0; i < n_thread; i++)
+ lthread_join(lt[i], NULL);
+
+}
+
+/*
+ * Start master scheduler with initial lthread spawning rx and tx lthreads
+ * (main_lthread_master).
+ */
+static int
+lthread_master_spawner(__rte_unused void *arg) {
+ struct lthread *lt;
+ int lcore_id = rte_lcore_id();
+
+ RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
+ lthread_create(&lt, -1, lthread_spawner, NULL);
+ lthread_run();
+
+ return 0;
+}
+
+/*
+ * Start scheduler on lcore.
+ */
+static int
+sched_spawner(__rte_unused void *arg) {
+ struct lthread *lt;
+ int lcore_id = rte_lcore_id();
+
+#if (APP_CPU_LOAD)
+ if (lcore_id == cpu_load_lcore_id) {
+ cpu_load_collector(arg);
+ return 0;
+ }
+#endif /* APP_CPU_LOAD */
+
+ RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
+ lthread_create(&lt, -1, lthread_null, NULL);
+ lthread_run();
+
+ return 0;
+}
+
+/* main processing loop */
+static int
+pthread_tx(void *dummy)
+{
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ uint64_t prev_tsc, diff_tsc, cur_tsc;
+ int nb_rx;
+ uint8_t portid;
+ struct thread_tx_conf *tx_conf;
+
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) /
+ US_PER_S * BURST_TX_DRAIN_US;
+
+ prev_tsc = 0;
+
+ tx_conf = (struct thread_tx_conf *)dummy;
+
+ RTE_LOG(INFO, L3FWD, "Entering main Tx loop on lcore %u\n", rte_lcore_id());
+
+ tx_conf->conf.cpu_id = sched_getcpu();
+ rte_atomic16_inc(&tx_counter);
+ while (1) {
+
+ cur_tsc = rte_rdtsc();
+
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ /*
+ * This could be optimized (use queueid instead of
+ * portid), but it is not called so often
+ */
+ SET_CPU_BUSY(tx_conf, CPU_PROCESS);
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ if (tx_conf->tx_mbufs[portid].len == 0)
+ continue;
+ send_burst(tx_conf, tx_conf->tx_mbufs[portid].len, portid);
+ tx_conf->tx_mbufs[portid].len = 0;
+ }
+ SET_CPU_IDLE(tx_conf, CPU_PROCESS);
+
+ prev_tsc = cur_tsc;
+ }
+
+ /*
+ * Read packet from ring
+ */
+ SET_CPU_BUSY(tx_conf, CPU_POLL);
+ nb_rx = rte_ring_sc_dequeue_burst(tx_conf->ring,
+ (void **)pkts_burst, MAX_PKT_BURST);
+ SET_CPU_IDLE(tx_conf, CPU_POLL);
+
+ if (unlikely(nb_rx == 0)) {
+ sched_yield();
+ continue;
+ }
+
+ SET_CPU_BUSY(tx_conf, CPU_PROCESS);
+ portid = pkts_burst[0]->port;
+ process_burst(pkts_burst, nb_rx, portid);
+ SET_CPU_IDLE(tx_conf, CPU_PROCESS);
+
+ }
+}
+
+static int
+pthread_rx(void *dummy)
+{
+ int i;
+ int worker_id;
+ uint32_t n;
+ uint32_t nb_rx;
+ unsigned lcore_id;
+ uint8_t portid, queueid;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+
+ struct thread_rx_conf *rx_conf;
+
+ lcore_id = rte_lcore_id();
+ rx_conf = (struct thread_rx_conf *)dummy;
+
+ if (rx_conf->n_rx_queue == 0) {
+ RTE_LOG(INFO, L3FWD, "lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ RTE_LOG(INFO, L3FWD, "entering main rx loop on lcore %u\n", lcore_id);
+
+ for (i = 0; i < rx_conf->n_rx_queue; i++) {
+
+ portid = rx_conf->rx_queue_list[i].port_id;
+ queueid = rx_conf->rx_queue_list[i].queue_id;
+ RTE_LOG(INFO, L3FWD, " -- lcoreid=%u portid=%hhu rxqueueid=%hhu\n",
+ lcore_id, portid, queueid);
+ }
+
+ worker_id = 0;
+ rx_conf->conf.cpu_id = sched_getcpu();
+ rte_atomic16_inc(&rx_counter);
+ while (1) {
+
+ /*
+ * Read packet from RX queues
+ */
+ for (i = 0; i < rx_conf->n_rx_queue; ++i) {
+ portid = rx_conf->rx_queue_list[i].port_id;
+ queueid = rx_conf->rx_queue_list[i].queue_id;
+
+ SET_CPU_BUSY(rx_conf, CPU_POLL);
+ nb_rx = rte_eth_rx_burst(portid, queueid, pkts_burst,
+ MAX_PKT_BURST);
+ SET_CPU_IDLE(rx_conf, CPU_POLL);
+
+ if (nb_rx == 0) {
+ sched_yield();
+ continue;
+ }
+
+ SET_CPU_BUSY(rx_conf, CPU_PROCESS);
+ worker_id = (worker_id + 1) % rx_conf->n_ring;
+ n = rte_ring_sp_enqueue_burst(rx_conf->ring[worker_id],
+ (void **)pkts_burst, nb_rx);
+
+ if (unlikely(n != nb_rx)) {
+ uint32_t k;
+
+ for (k = n; k < nb_rx; k++) {
+ struct rte_mbuf *m = pkts_burst[k];
+
+ rte_pktmbuf_free(m);
+ }
+ }
+
+ SET_CPU_IDLE(rx_conf, CPU_PROCESS);
+
+ }
+ }
+}
+
+/*
+ * P-Thread spawner.
+ */
+static int
+pthread_run(__rte_unused void *arg) {
+ int lcore_id = rte_lcore_id();
+ int i;
+
+ for (i = 0; i < n_rx_thread; i++)
+ if (rx_thread[i].conf.lcore_id == lcore_id) {
+ printf("Start rx thread on %d...\n", lcore_id);
+ RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
+ RTE_PER_LCORE(lcore_conf)->data = (void *)&rx_thread[i];
+ pthread_rx((void *)&rx_thread[i]);
+ return 0;
+ }
+
+ for (i = 0; i < n_tx_thread; i++)
+ if (tx_thread[i].conf.lcore_id == lcore_id) {
+ printf("Start tx thread on %d...\n", lcore_id);
+ RTE_PER_LCORE(lcore_conf) = &lcore_conf[lcore_id];
+ RTE_PER_LCORE(lcore_conf)->data = (void *)&tx_thread[i];
+ pthread_tx((void *)&tx_thread[i]);
+ return 0;
+ }
+
+#if (APP_CPU_LOAD)
+ if (lcore_id == cpu_load_lcore_id)
+ cpu_load_collector(arg);
+#endif /* APP_CPU_LOAD */
+
+ return 0;
+}
+
+static int
+check_lcore_params(void)
+{
+ uint8_t queue, lcore;
+ uint16_t i;
+ int socketid;
+
+ for (i = 0; i < nb_rx_thread_params; ++i) {
+ queue = rx_thread_params[i].queue_id;
+ if (queue >= MAX_RX_QUEUE_PER_PORT) {
+ printf("invalid queue number: %hhu\n", queue);
+ return -1;
+ }
+ lcore = rx_thread_params[i].lcore_id;
+ if (!rte_lcore_is_enabled(lcore)) {
+ printf("error: lcore %hhu is not enabled in lcore mask\n", lcore);
+ return -1;
+ }
+ socketid = rte_lcore_to_socket_id(lcore);
+ if ((socketid != 0) && (numa_on == 0))
+ printf("warning: lcore %hhu is on socket %d with numa off\n",
+ lcore, socketid);
+ }
+ return 0;
+}
+
+static int
+check_port_config(const unsigned nb_ports)
+{
+ unsigned portid;
+ uint16_t i;
+
+ for (i = 0; i < nb_rx_thread_params; ++i) {
+ portid = rx_thread_params[i].port_id;
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("port %u is not enabled in port mask\n", portid);
+ return -1;
+ }
+ if (portid >= nb_ports) {
+ printf("port %u is not present on the board\n", portid);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static uint8_t
+get_port_n_rx_queues(const uint8_t port)
+{
+ int queue = -1;
+ uint16_t i;
+
+ for (i = 0; i < nb_rx_thread_params; ++i)
+ if (rx_thread_params[i].port_id == port &&
+ rx_thread_params[i].queue_id > queue)
+ queue = rx_thread_params[i].queue_id;
+
+ return (uint8_t)(++queue);
+}
+
+static int
+init_rx_rings(void)
+{
+ unsigned socket_io;
+ struct thread_rx_conf *rx_conf;
+ struct thread_tx_conf *tx_conf;
+ unsigned rx_thread_id, tx_thread_id;
+ char name[256];
+ struct rte_ring *ring = NULL;
+
+ for (tx_thread_id = 0; tx_thread_id < n_tx_thread; tx_thread_id++) {
+
+ tx_conf = &tx_thread[tx_thread_id];
+
+ printf("Connecting tx-thread %d with rx-thread %d\n", tx_thread_id,
+ tx_conf->conf.thread_id);
+
+ rx_thread_id = tx_conf->conf.thread_id;
+ if (rx_thread_id > n_tx_thread) {
+ printf("connection from tx-thread %u to rx-thread %u fails "
+ "(rx-thread not defined)\n", tx_thread_id, rx_thread_id);
+ return -1;
+ }
+
+ rx_conf = &rx_thread[rx_thread_id];
+ socket_io = rte_lcore_to_socket_id(rx_conf->conf.lcore_id);
+
+ snprintf(name, sizeof(name), "app_ring_s%u_rx%u_tx%u",
+ socket_io, rx_thread_id, tx_thread_id);
+
+ ring = rte_ring_create(name, 1024 * 4, socket_io,
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+
+ if (ring == NULL) {
+ rte_panic("Cannot create ring to connect rx-thread %u "
+ "with tx-thread %u\n", rx_thread_id, tx_thread_id);
+ }
+
+ rx_conf->ring[rx_conf->n_ring] = ring;
+
+ tx_conf->ring = ring;
+ tx_conf->ready = &rx_conf->ready[rx_conf->n_ring];
+
+ rx_conf->n_ring++;
+ }
+ return 0;
+}
+
+static int
+init_rx_queues(void)
+{
+ uint16_t i, nb_rx_queue;
+ uint8_t thread;
+
+ n_rx_thread = 0;
+
+ for (i = 0; i < nb_rx_thread_params; ++i) {
+ thread = rx_thread_params[i].thread_id;
+ nb_rx_queue = rx_thread[thread].n_rx_queue;
+
+ if (nb_rx_queue >= MAX_RX_QUEUE_PER_LCORE) {
+ printf("error: too many queues (%u) for thread: %u\n",
+ (unsigned)nb_rx_queue + 1, (unsigned)thread);
+ return -1;
+ }
+
+ rx_thread[thread].conf.thread_id = thread;
+ rx_thread[thread].conf.lcore_id = rx_thread_params[i].lcore_id;
+ rx_thread[thread].rx_queue_list[nb_rx_queue].port_id =
+ rx_thread_params[i].port_id;
+ rx_thread[thread].rx_queue_list[nb_rx_queue].queue_id =
+ rx_thread_params[i].queue_id;
+ rx_thread[thread].n_rx_queue++;
+
+ if (thread >= n_rx_thread)
+ n_rx_thread = thread + 1;
+
+ }
+ return 0;
+}
+
+static int
+init_tx_threads(void)
+{
+ int i;
+
+ n_tx_thread = 0;
+ for (i = 0; i < nb_tx_thread_params; ++i) {
+ tx_thread[n_tx_thread].conf.thread_id = tx_thread_params[i].thread_id;
+ tx_thread[n_tx_thread].conf.lcore_id = tx_thread_params[i].lcore_id;
+ n_tx_thread++;
+ }
+ return 0;
+}
+
+/* display usage */
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK -P"
+ " [--rx (port,queue,lcore,thread)[,(port,queue,lcore,thread]]"
+ " [--tx (lcore,thread)[,(lcore,thread]]"
+ " [--enable-jumbo [--max-pkt-len PKTLEN]]\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n"
+ " -P : enable promiscuous mode\n"
+ " --rx (port,queue,lcore,thread): rx queues configuration\n"
+ " --tx (lcore,thread): tx threads configuration\n"
+ " --stat-lcore LCORE: use lcore for stat collector\n"
+ " --eth-dest=X,MM:MM:MM:MM:MM:MM: optional, ethernet destination for port X\n"
+ " --no-numa: optional, disable numa awareness\n"
+ " --ipv6: optional, specify it if running ipv6 packets\n"
+ " --enable-jumbo: enable jumbo frame"
+ " which max packet len is PKTLEN in decimal (64-9600)\n"
+ " --hash-entry-num: specify the hash entry number in hexadecimal to be setup\n"
+ " --no-lthreads: turn off lthread model\n",
+ prgname);
+}
+
+static int parse_max_pkt_len(const char *pktlen)
+{
+ char *end = NULL;
+ unsigned long len;
+
+ /* parse decimal string */
+ len = strtoul(pktlen, &end, 10);
+ if ((pktlen[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (len == 0)
+ return -1;
+
+ return len;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+static int
+parse_hash_entry_number(const char *hash_entry_num)
+{
+ char *end = NULL;
+ unsigned long hash_en;
+
+ /* parse hexadecimal string */
+ hash_en = strtoul(hash_entry_num, &end, 16);
+ if ((hash_entry_num[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (hash_en == 0)
+ return -1;
+
+ return hash_en;
+}
+#endif
+
+static int
+parse_rx_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_PORT = 0,
+ FLD_QUEUE,
+ FLD_LCORE,
+ FLD_THREAD,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_rx_thread_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ p0 = strchr(p, ')');
+ if (p0 == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_rx_thread_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of rx params: %hu\n",
+ nb_rx_thread_params);
+ return -1;
+ }
+ rx_thread_params_array[nb_rx_thread_params].port_id =
+ (uint8_t)int_fld[FLD_PORT];
+ rx_thread_params_array[nb_rx_thread_params].queue_id =
+ (uint8_t)int_fld[FLD_QUEUE];
+ rx_thread_params_array[nb_rx_thread_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ rx_thread_params_array[nb_rx_thread_params].thread_id =
+ (uint8_t)int_fld[FLD_THREAD];
+ ++nb_rx_thread_params;
+ }
+ rx_thread_params = rx_thread_params_array;
+ return 0;
+}
+
+static int
+parse_tx_config(const char *q_arg)
+{
+ char s[256];
+ const char *p, *p0 = q_arg;
+ char *end;
+ enum fieldnames {
+ FLD_LCORE = 0,
+ FLD_THREAD,
+ _NUM_FLD
+ };
+ unsigned long int_fld[_NUM_FLD];
+ char *str_fld[_NUM_FLD];
+ int i;
+ unsigned size;
+
+ nb_tx_thread_params = 0;
+
+ while ((p = strchr(p0, '(')) != NULL) {
+ ++p;
+ p0 = strchr(p, ')');
+ if (p0 == NULL)
+ return -1;
+
+ size = p0 - p;
+ if (size >= sizeof(s))
+ return -1;
+
+ snprintf(s, sizeof(s), "%.*s", size, p);
+ if (rte_strsplit(s, sizeof(s), str_fld, _NUM_FLD, ',') != _NUM_FLD)
+ return -1;
+ for (i = 0; i < _NUM_FLD; i++) {
+ errno = 0;
+ int_fld[i] = strtoul(str_fld[i], &end, 0);
+ if (errno != 0 || end == str_fld[i] || int_fld[i] > 255)
+ return -1;
+ }
+ if (nb_tx_thread_params >= MAX_LCORE_PARAMS) {
+ printf("exceeded max number of tx params: %hu\n",
+ nb_tx_thread_params);
+ return -1;
+ }
+ tx_thread_params_array[nb_tx_thread_params].lcore_id =
+ (uint8_t)int_fld[FLD_LCORE];
+ tx_thread_params_array[nb_tx_thread_params].thread_id =
+ (uint8_t)int_fld[FLD_THREAD];
+ ++nb_tx_thread_params;
+ }
+ tx_thread_params = tx_thread_params_array;
+
+ return 0;
+}
+
+#if (APP_CPU_LOAD > 0)
+static int
+parse_stat_lcore(const char *stat_lcore)
+{
+ char *end = NULL;
+ unsigned long lcore_id;
+
+ lcore_id = strtoul(stat_lcore, &end, 10);
+ if ((stat_lcore[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ return lcore_id;
+}
+#endif
+
+static void
+parse_eth_dest(const char *optarg)
+{
+ uint8_t portid;
+ char *port_end;
+ uint8_t c, *dest, peer_addr[6];
+
+ errno = 0;
+ portid = strtoul(optarg, &port_end, 10);
+ if (errno != 0 || port_end == optarg || *port_end++ != ',')
+ rte_exit(EXIT_FAILURE,
+ "Invalid eth-dest: %s", optarg);
+ if (portid >= RTE_MAX_ETHPORTS)
+ rte_exit(EXIT_FAILURE,
+ "eth-dest: port %d >= RTE_MAX_ETHPORTS(%d)\n",
+ portid, RTE_MAX_ETHPORTS);
+
+ if (cmdline_parse_etheraddr(NULL, port_end,
+ &peer_addr, sizeof(peer_addr)) < 0)
+ rte_exit(EXIT_FAILURE,
+ "Invalid ethernet address: %s\n",
+ port_end);
+ dest = (uint8_t *)&dest_eth_addr[portid];
+ for (c = 0; c < 6; c++)
+ dest[c] = peer_addr[c];
+ *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
+}
+
+#define CMD_LINE_OPT_RX_CONFIG "rx"
+#define CMD_LINE_OPT_TX_CONFIG "tx"
+#define CMD_LINE_OPT_STAT_LCORE "stat-lcore"
+#define CMD_LINE_OPT_ETH_DEST "eth-dest"
+#define CMD_LINE_OPT_NO_NUMA "no-numa"
+#define CMD_LINE_OPT_IPV6 "ipv6"
+#define CMD_LINE_OPT_ENABLE_JUMBO "enable-jumbo"
+#define CMD_LINE_OPT_HASH_ENTRY_NUM "hash-entry-num"
+#define CMD_LINE_OPT_NO_LTHREADS "no-lthreads"
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {CMD_LINE_OPT_RX_CONFIG, 1, 0, 0},
+ {CMD_LINE_OPT_TX_CONFIG, 1, 0, 0},
+ {CMD_LINE_OPT_STAT_LCORE, 1, 0, 0},
+ {CMD_LINE_OPT_ETH_DEST, 1, 0, 0},
+ {CMD_LINE_OPT_NO_NUMA, 0, 0, 0},
+ {CMD_LINE_OPT_IPV6, 0, 0, 0},
+ {CMD_LINE_OPT_ENABLE_JUMBO, 0, 0, 0},
+ {CMD_LINE_OPT_HASH_ENTRY_NUM, 1, 0, 0},
+ {CMD_LINE_OPT_NO_LTHREADS, 0, 0, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:P",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ case 'P':
+ printf("Promiscuous mode selected\n");
+ promiscuous_on = 1;
+ break;
+
+ /* long options */
+ case 0:
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_RX_CONFIG,
+ sizeof(CMD_LINE_OPT_RX_CONFIG))) {
+ ret = parse_rx_config(optarg);
+ if (ret) {
+ printf("invalid rx-config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_TX_CONFIG,
+ sizeof(CMD_LINE_OPT_TX_CONFIG))) {
+ ret = parse_tx_config(optarg);
+ if (ret) {
+ printf("invalid tx-config\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+#if (APP_CPU_LOAD > 0)
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_STAT_LCORE,
+ sizeof(CMD_LINE_OPT_STAT_LCORE))) {
+ cpu_load_lcore_id = parse_stat_lcore(optarg);
+ }
+#endif
+
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ETH_DEST,
+ sizeof(CMD_LINE_OPT_ETH_DEST)))
+ parse_eth_dest(optarg);
+
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_NUMA,
+ sizeof(CMD_LINE_OPT_NO_NUMA))) {
+ printf("numa is disabled\n");
+ numa_on = 0;
+ }
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_IPV6,
+ sizeof(CMD_LINE_OPT_IPV6))) {
+ printf("ipv6 is specified\n");
+ ipv6 = 1;
+ }
+#endif
+
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_NO_LTHREADS,
+ sizeof(CMD_LINE_OPT_NO_LTHREADS))) {
+ printf("l-threads model is disabled\n");
+ lthreads_on = 0;
+ }
+
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_ENABLE_JUMBO,
+ sizeof(CMD_LINE_OPT_ENABLE_JUMBO))) {
+ struct option lenopts = {"max-pkt-len", required_argument, 0,
+ 0};
+
+ printf("jumbo frame is enabled - disabling simple TX path\n");
+ port_conf.rxmode.jumbo_frame = 1;
+
+ /* if no max-pkt-len set, use the default value ETHER_MAX_LEN */
+ if (0 == getopt_long(argc, argvopt, "", &lenopts,
+ &option_index)) {
+
+ ret = parse_max_pkt_len(optarg);
+ if ((ret < 64) || (ret > MAX_JUMBO_PKT_LEN)) {
+ printf("invalid packet length\n");
+ print_usage(prgname);
+ return -1;
+ }
+ port_conf.rxmode.max_rx_pkt_len = ret;
+ }
+ printf("set jumbo frame max packet length to %u\n",
+ (unsigned int)port_conf.rxmode.max_rx_pkt_len);
+ }
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+ if (!strncmp(lgopts[option_index].name, CMD_LINE_OPT_HASH_ENTRY_NUM,
+ sizeof(CMD_LINE_OPT_HASH_ENTRY_NUM))) {
+ ret = parse_hash_entry_number(optarg);
+ if ((ret > 0) && (ret <= L3FWD_HASH_ENTRIES)) {
+ hash_entry_number = ret;
+ } else {
+ printf("invalid hash entry number\n");
+ print_usage(prgname);
+ return -1;
+ }
+ }
+#endif
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind >= 0)
+ argv[optind-1] = prgname;
+
+ ret = optind-1;
+ optind = 0; /* reset getopt lib */
+ return ret;
+}
+
+static void
+print_ethaddr(const char *name, const struct ether_addr *eth_addr)
+{
+ char buf[ETHER_ADDR_FMT_SIZE];
+
+ ether_format_addr(buf, ETHER_ADDR_FMT_SIZE, eth_addr);
+ printf("%s%s", name, buf);
+}
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_EXACT_MATCH)
+
+static void convert_ipv4_5tuple(struct ipv4_5tuple *key1,
+ union ipv4_5tuple_host *key2)
+{
+ key2->ip_dst = rte_cpu_to_be_32(key1->ip_dst);
+ key2->ip_src = rte_cpu_to_be_32(key1->ip_src);
+ key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
+ key2->port_src = rte_cpu_to_be_16(key1->port_src);
+ key2->proto = key1->proto;
+ key2->pad0 = 0;
+ key2->pad1 = 0;
+}
+
+static void convert_ipv6_5tuple(struct ipv6_5tuple *key1,
+ union ipv6_5tuple_host *key2)
+{
+ uint32_t i;
+
+ for (i = 0; i < 16; i++) {
+ key2->ip_dst[i] = key1->ip_dst[i];
+ key2->ip_src[i] = key1->ip_src[i];
+ }
+ key2->port_dst = rte_cpu_to_be_16(key1->port_dst);
+ key2->port_src = rte_cpu_to_be_16(key1->port_src);
+ key2->proto = key1->proto;
+ key2->pad0 = 0;
+ key2->pad1 = 0;
+ key2->reserve = 0;
+}
+
+#define BYTE_VALUE_MAX 256
+#define ALL_32_BITS 0xffffffff
+#define BIT_8_TO_15 0x0000ff00
+static inline void
+populate_ipv4_few_flow_into_table(const struct rte_hash *h)
+{
+ uint32_t i;
+ int32_t ret;
+ uint32_t array_len = RTE_DIM(ipv4_l3fwd_route_array);
+
+ mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
+ for (i = 0; i < array_len; i++) {
+ struct ipv4_l3fwd_route entry;
+ union ipv4_5tuple_host newkey;
+
+ entry = ipv4_l3fwd_route_array[i];
+ convert_ipv4_5tuple(&entry.key, &newkey);
+ ret = rte_hash_add_key(h, (void *)&newkey);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
+ " to the l3fwd hash.\n", i);
+ }
+ ipv4_l3fwd_out_if[ret] = entry.if_out;
+ }
+ printf("Hash: Adding 0x%" PRIx32 " keys\n", array_len);
+}
+
+#define BIT_16_TO_23 0x00ff0000
+static inline void
+populate_ipv6_few_flow_into_table(const struct rte_hash *h)
+{
+ uint32_t i;
+ int32_t ret;
+ uint32_t array_len = RTE_DIM(ipv6_l3fwd_route_array);
+
+ mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
+ mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
+ for (i = 0; i < array_len; i++) {
+ struct ipv6_l3fwd_route entry;
+ union ipv6_5tuple_host newkey;
+
+ entry = ipv6_l3fwd_route_array[i];
+ convert_ipv6_5tuple(&entry.key, &newkey);
+ ret = rte_hash_add_key(h, (void *)&newkey);
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %" PRIu32
+ " to the l3fwd hash.\n", i);
+ }
+ ipv6_l3fwd_out_if[ret] = entry.if_out;
+ }
+ printf("Hash: Adding 0x%" PRIx32 "keys\n", array_len);
+}
+
+#define NUMBER_PORT_USED 4
+static inline void
+populate_ipv4_many_flow_into_table(const struct rte_hash *h,
+ unsigned int nr_flow)
+{
+ unsigned i;
+
+ mask0 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_8_TO_15);
+
+ for (i = 0; i < nr_flow; i++) {
+ struct ipv4_l3fwd_route entry;
+ union ipv4_5tuple_host newkey;
+ uint8_t a = (uint8_t)((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
+ uint8_t b = (uint8_t)(((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
+ BYTE_VALUE_MAX);
+ uint8_t c = (uint8_t)((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
+ BYTE_VALUE_MAX));
+ /* Create the ipv4 exact match flow */
+ memset(&entry, 0, sizeof(entry));
+ switch (i & (NUMBER_PORT_USED - 1)) {
+ case 0:
+ entry = ipv4_l3fwd_route_array[0];
+ entry.key.ip_dst = IPv4(101, c, b, a);
+ break;
+ case 1:
+ entry = ipv4_l3fwd_route_array[1];
+ entry.key.ip_dst = IPv4(201, c, b, a);
+ break;
+ case 2:
+ entry = ipv4_l3fwd_route_array[2];
+ entry.key.ip_dst = IPv4(111, c, b, a);
+ break;
+ case 3:
+ entry = ipv4_l3fwd_route_array[3];
+ entry.key.ip_dst = IPv4(211, c, b, a);
+ break;
+ };
+ convert_ipv4_5tuple(&entry.key, &newkey);
+ int32_t ret = rte_hash_add_key(h, (void *)&newkey);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
+
+ ipv4_l3fwd_out_if[ret] = (uint8_t)entry.if_out;
+
+ }
+ printf("Hash: Adding 0x%x keys\n", nr_flow);
+}
+
+static inline void
+populate_ipv6_many_flow_into_table(const struct rte_hash *h,
+ unsigned int nr_flow)
+{
+ unsigned i;
+
+ mask1 = _mm_set_epi32(ALL_32_BITS, ALL_32_BITS, ALL_32_BITS, BIT_16_TO_23);
+ mask2 = _mm_set_epi32(0, 0, ALL_32_BITS, ALL_32_BITS);
+ for (i = 0; i < nr_flow; i++) {
+ struct ipv6_l3fwd_route entry;
+ union ipv6_5tuple_host newkey;
+
+ uint8_t a = (uint8_t) ((i / NUMBER_PORT_USED) % BYTE_VALUE_MAX);
+ uint8_t b = (uint8_t) (((i / NUMBER_PORT_USED) / BYTE_VALUE_MAX) %
+ BYTE_VALUE_MAX);
+ uint8_t c = (uint8_t) ((i / NUMBER_PORT_USED) / (BYTE_VALUE_MAX *
+ BYTE_VALUE_MAX));
+
+ /* Create the ipv6 exact match flow */
+ memset(&entry, 0, sizeof(entry));
+ switch (i & (NUMBER_PORT_USED - 1)) {
+ case 0:
+ entry = ipv6_l3fwd_route_array[0];
+ break;
+ case 1:
+ entry = ipv6_l3fwd_route_array[1];
+ break;
+ case 2:
+ entry = ipv6_l3fwd_route_array[2];
+ break;
+ case 3:
+ entry = ipv6_l3fwd_route_array[3];
+ break;
+ };
+ entry.key.ip_dst[13] = c;
+ entry.key.ip_dst[14] = b;
+ entry.key.ip_dst[15] = a;
+ convert_ipv6_5tuple(&entry.key, &newkey);
+ int32_t ret = rte_hash_add_key(h, (void *)&newkey);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u\n", i);
+
+ ipv6_l3fwd_out_if[ret] = (uint8_t) entry.if_out;
+
+ }
+ printf("Hash: Adding 0x%x keys\n", nr_flow);
+}
+
+static void
+setup_hash(int socketid)
+{
+ struct rte_hash_parameters ipv4_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(union ipv4_5tuple_host),
+ .hash_func = ipv4_hash_crc,
+ .hash_func_init_val = 0,
+ };
+
+ struct rte_hash_parameters ipv6_l3fwd_hash_params = {
+ .name = NULL,
+ .entries = L3FWD_HASH_ENTRIES,
+ .key_len = sizeof(union ipv6_5tuple_host),
+ .hash_func = ipv6_hash_crc,
+ .hash_func_init_val = 0,
+ };
+
+ char s[64];
+
+ /* create ipv4 hash */
+ snprintf(s, sizeof(s), "ipv4_l3fwd_hash_%d", socketid);
+ ipv4_l3fwd_hash_params.name = s;
+ ipv4_l3fwd_hash_params.socket_id = socketid;
+ ipv4_l3fwd_lookup_struct[socketid] =
+ rte_hash_create(&ipv4_l3fwd_hash_params);
+ if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
+ "socket %d\n", socketid);
+
+ /* create ipv6 hash */
+ snprintf(s, sizeof(s), "ipv6_l3fwd_hash_%d", socketid);
+ ipv6_l3fwd_hash_params.name = s;
+ ipv6_l3fwd_hash_params.socket_id = socketid;
+ ipv6_l3fwd_lookup_struct[socketid] =
+ rte_hash_create(&ipv6_l3fwd_hash_params);
+ if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd hash on "
+ "socket %d\n", socketid);
+
+ if (hash_entry_number != HASH_ENTRY_NUMBER_DEFAULT) {
+ /* For testing hash matching with a large number of flows we
+ * generate millions of IP 5-tuples with an incremented dst
+ * address to initialize the hash table. */
+ if (ipv6 == 0) {
+ /* populate the ipv4 hash */
+ populate_ipv4_many_flow_into_table(
+ ipv4_l3fwd_lookup_struct[socketid], hash_entry_number);
+ } else {
+ /* populate the ipv6 hash */
+ populate_ipv6_many_flow_into_table(
+ ipv6_l3fwd_lookup_struct[socketid], hash_entry_number);
+ }
+ } else {
+ /* Use data in ipv4/ipv6 l3fwd lookup table directly to initialize
+ * the hash table */
+ if (ipv6 == 0) {
+ /* populate the ipv4 hash */
+ populate_ipv4_few_flow_into_table(
+ ipv4_l3fwd_lookup_struct[socketid]);
+ } else {
+ /* populate the ipv6 hash */
+ populate_ipv6_few_flow_into_table(
+ ipv6_l3fwd_lookup_struct[socketid]);
+ }
+ }
+}
+#endif
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+static void
+setup_lpm(int socketid)
+{
+ struct rte_lpm6_config config;
+ struct rte_lpm_config lpm_ipv4_config;
+ unsigned i;
+ int ret;
+ char s[64];
+
+ /* create the LPM table */
+ snprintf(s, sizeof(s), "IPV4_L3FWD_LPM_%d", socketid);
+ lpm_ipv4_config.max_rules = IPV4_L3FWD_LPM_MAX_RULES;
+ lpm_ipv4_config.number_tbl8s = 256;
+ lpm_ipv4_config.flags = 0;
+ ipv4_l3fwd_lookup_struct[socketid] =
+ rte_lpm_create(s, socketid, &lpm_ipv4_config);
+ if (ipv4_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
+ " on socket %d\n", socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < IPV4_L3FWD_NUM_ROUTES; i++) {
+
+ /* skip unused ports */
+ if ((1 << ipv4_l3fwd_route_array[i].if_out &
+ enabled_port_mask) == 0)
+ continue;
+
+ ret = rte_lpm_add(ipv4_l3fwd_lookup_struct[socketid],
+ ipv4_l3fwd_route_array[i].ip,
+ ipv4_l3fwd_route_array[i].depth,
+ ipv4_l3fwd_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
+ "l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route 0x%08x / %d (%d)\n",
+ (unsigned)ipv4_l3fwd_route_array[i].ip,
+ ipv4_l3fwd_route_array[i].depth,
+ ipv4_l3fwd_route_array[i].if_out);
+ }
+
+ /* create the LPM6 table */
+ snprintf(s, sizeof(s), "IPV6_L3FWD_LPM_%d", socketid);
+
+ config.max_rules = IPV6_L3FWD_LPM_MAX_RULES;
+ config.number_tbl8s = IPV6_L3FWD_LPM_NUMBER_TBL8S;
+ config.flags = 0;
+ ipv6_l3fwd_lookup_struct[socketid] = rte_lpm6_create(s, socketid,
+ &config);
+ if (ipv6_l3fwd_lookup_struct[socketid] == NULL)
+ rte_exit(EXIT_FAILURE, "Unable to create the l3fwd LPM table"
+ " on socket %d\n", socketid);
+
+ /* populate the LPM table */
+ for (i = 0; i < IPV6_L3FWD_NUM_ROUTES; i++) {
+
+ /* skip unused ports */
+ if ((1 << ipv6_l3fwd_route_array[i].if_out &
+ enabled_port_mask) == 0)
+ continue;
+
+ ret = rte_lpm6_add(ipv6_l3fwd_lookup_struct[socketid],
+ ipv6_l3fwd_route_array[i].ip,
+ ipv6_l3fwd_route_array[i].depth,
+ ipv6_l3fwd_route_array[i].if_out);
+
+ if (ret < 0) {
+ rte_exit(EXIT_FAILURE, "Unable to add entry %u to the "
+ "l3fwd LPM table on socket %d\n",
+ i, socketid);
+ }
+
+ printf("LPM: Adding route %s / %d (%d)\n",
+ "IPV6",
+ ipv6_l3fwd_route_array[i].depth,
+ ipv6_l3fwd_route_array[i].if_out);
+ }
+}
+#endif
+
+static int
+init_mem(unsigned nb_mbuf)
+{
+ struct lcore_conf *qconf;
+ int socketid;
+ unsigned lcore_id;
+ char s[64];
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ if (socketid >= NB_SOCKETS) {
+ rte_exit(EXIT_FAILURE, "Socket %d of lcore %u is out of range %d\n",
+ socketid, lcore_id, NB_SOCKETS);
+ }
+ if (pktmbuf_pool[socketid] == NULL) {
+ snprintf(s, sizeof(s), "mbuf_pool_%d", socketid);
+ pktmbuf_pool[socketid] =
+ rte_pktmbuf_pool_create(s, nb_mbuf,
+ MEMPOOL_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, socketid);
+ if (pktmbuf_pool[socketid] == NULL)
+ rte_exit(EXIT_FAILURE,
+ "Cannot init mbuf pool on socket %d\n", socketid);
+ else
+ printf("Allocated mbuf pool on socket %d\n", socketid);
+
+#if (APP_LOOKUP_METHOD == APP_LOOKUP_LPM)
+ setup_lpm(socketid);
+#else
+ setup_hash(socketid);
+#endif
+ }
+ qconf = &lcore_conf[lcore_id];
+ qconf->ipv4_lookup_struct = ipv4_l3fwd_lookup_struct[socketid];
+ qconf->ipv6_lookup_struct = ipv6_l3fwd_lookup_struct[socketid];
+ }
+ return 0;
+}
+
+/* Check the link status of all ports in up to 9s, and print them finally */
+static void
+check_all_ports_link_status(uint8_t port_num, uint32_t port_mask)
+{
+#define CHECK_INTERVAL 100 /* 100ms */
+#define MAX_CHECK_TIME 90 /* 9s (90 * 100ms) in total */
+ uint8_t portid, count, all_ports_up, print_flag = 0;
+ struct rte_eth_link link;
+
+ printf("\nChecking link status");
+ fflush(stdout);
+ for (count = 0; count <= MAX_CHECK_TIME; count++) {
+ all_ports_up = 1;
+ for (portid = 0; portid < port_num; portid++) {
+ if ((port_mask & (1 << portid)) == 0)
+ continue;
+ memset(&link, 0, sizeof(link));
+ rte_eth_link_get_nowait(portid, &link);
+ /* print link status if flag set */
+ if (print_flag == 1) {
+ if (link.link_status)
+ printf("Port %d Link Up - speed %u "
+ "Mbps - %s\n", (uint8_t)portid,
+ (unsigned)link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ else
+ printf("Port %d Link Down\n",
+ (uint8_t)portid);
+ continue;
+ }
+ /* clear all_ports_up flag if any link down */
+ if (link.link_status == ETH_LINK_DOWN) {
+ all_ports_up = 0;
+ break;
+ }
+ }
+ /* after finally printing all link status, get out */
+ if (print_flag == 1)
+ break;
+
+ if (all_ports_up == 0) {
+ printf(".");
+ fflush(stdout);
+ rte_delay_ms(CHECK_INTERVAL);
+ }
+
+ /* set the print_flag if all ports up or timeout */
+ if (all_ports_up == 1 || count == (MAX_CHECK_TIME - 1)) {
+ print_flag = 1;
+ printf("done\n");
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_txconf *txconf;
+ int ret;
+ int i;
+ unsigned nb_ports;
+ uint16_t queueid;
+ unsigned lcore_id;
+ uint32_t n_tx_queue, nb_lcores;
+ uint8_t portid, nb_rx_queue, queue, socketid;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+
+ /* pre-init dst MACs for all ports to 02:00:00:00:00:xx */
+ for (portid = 0; portid < RTE_MAX_ETHPORTS; portid++) {
+ dest_eth_addr[portid] = ETHER_LOCAL_ADMIN_ADDR +
+ ((uint64_t)portid << 40);
+ *(uint64_t *)(val_eth + portid) = dest_eth_addr[portid];
+ }
+
+ /* parse application arguments (after the EAL ones) */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid L3FWD parameters\n");
+
+ if (check_lcore_params() < 0)
+ rte_exit(EXIT_FAILURE, "check_lcore_params failed\n");
+
+ printf("Initializing rx-queues...\n");
+ ret = init_rx_queues();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_rx_queues failed\n");
+
+ printf("Initializing tx-threads...\n");
+ ret = init_tx_threads();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_tx_threads failed\n");
+
+ printf("Initializing rings...\n");
+ ret = init_rx_rings();
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_rx_rings failed\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ if (check_port_config(nb_ports) < 0)
+ rte_exit(EXIT_FAILURE, "check_port_config failed\n");
+
+ nb_lcores = rte_lcore_count();
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+
+ /* init port */
+ printf("Initializing port %d ... ", portid);
+ fflush(stdout);
+
+ nb_rx_queue = get_port_n_rx_queues(portid);
+ n_tx_queue = nb_lcores;
+ if (n_tx_queue > MAX_TX_QUEUE_PER_PORT)
+ n_tx_queue = MAX_TX_QUEUE_PER_PORT;
+ printf("Creating queues: nb_rxq=%d nb_txq=%u... ",
+ nb_rx_queue, (unsigned)n_tx_queue);
+ ret = rte_eth_dev_configure(portid, nb_rx_queue,
+ (uint16_t)n_tx_queue, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: err=%d, port=%d\n",
+ ret, portid);
+
+ rte_eth_macaddr_get(portid, &ports_eth_addr[portid]);
+ print_ethaddr(" Address:", &ports_eth_addr[portid]);
+ printf(", ");
+ print_ethaddr("Destination:",
+ (const struct ether_addr *)&dest_eth_addr[portid]);
+ printf(", ");
+
+ /*
+ * prepare src MACs for each port.
+ */
+ ether_addr_copy(&ports_eth_addr[portid],
+ (struct ether_addr *)(val_eth + portid) + 1);
+
+ /* init memory */
+ ret = init_mem(NB_MBUF);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "init_mem failed\n");
+
+ /* init one TX queue per couple (lcore,port) */
+ queueid = 0;
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) == 0)
+ continue;
+
+ if (numa_on)
+ socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("txq=%u,%d,%d ", lcore_id, queueid, socketid);
+ fflush(stdout);
+
+ rte_eth_dev_info_get(portid, &dev_info);
+ txconf = &dev_info.default_txconf;
+ if (port_conf.rxmode.jumbo_frame)
+ txconf->txq_flags = 0;
+ ret = rte_eth_tx_queue_setup(portid, queueid, nb_txd,
+ socketid, txconf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+
+ tx_thread[lcore_id].tx_queue_id[portid] = queueid;
+ queueid++;
+ }
+ printf("\n");
+ }
+
+ for (i = 0; i < n_rx_thread; i++) {
+ lcore_id = rx_thread[i].conf.lcore_id;
+
+ if (rte_lcore_is_enabled(lcore_id) == 0) {
+ rte_exit(EXIT_FAILURE,
+ "Cannot start Rx thread on lcore %u: lcore disabled\n",
+ lcore_id
+ );
+ }
+
+ printf("\nInitializing rx queues for Rx thread %d on lcore %u ... ",
+ i, lcore_id);
+ fflush(stdout);
+
+ /* init RX queues */
+ for (queue = 0; queue < rx_thread[i].n_rx_queue; ++queue) {
+ portid = rx_thread[i].rx_queue_list[queue].port_id;
+ queueid = rx_thread[i].rx_queue_list[queue].queue_id;
+
+ if (numa_on)
+ socketid = (uint8_t)rte_lcore_to_socket_id(lcore_id);
+ else
+ socketid = 0;
+
+ printf("rxq=%d,%d,%d ", portid, queueid, socketid);
+ fflush(stdout);
+
+ ret = rte_eth_rx_queue_setup(portid, queueid, nb_rxd,
+ socketid,
+ NULL,
+ pktmbuf_pool[socketid]);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_rx_queue_setup: err=%d, "
+ "port=%d\n", ret, portid);
+ }
+ }
+
+ printf("\n");
+
+ /* start ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((enabled_port_mask & (1 << portid)) == 0)
+ continue;
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_dev_start: err=%d, port=%d\n",
+ ret, portid);
+
+ /*
+ * If enabled, put device in promiscuous mode.
+ * This allows IO forwarding mode to forward packets
+ * to itself through 2 cross-connected ports of the
+ * target machine.
+ */
+ if (promiscuous_on)
+ rte_eth_promiscuous_enable(portid);
+ }
+
+ check_all_ports_link_status((uint8_t)nb_ports, enabled_port_mask);
+
+ if (lthreads_on) {
+ printf("Starting L-Threading Model\n");
+
+#if (APP_CPU_LOAD > 0)
+ if (cpu_load_lcore_id > 0)
+ /* Use one lcore for cpu load collector */
+ nb_lcores--;
+#endif
+
+ lthread_num_schedulers_set(nb_lcores);
+ rte_eal_mp_remote_launch(sched_spawner, NULL, SKIP_MASTER);
+ lthread_master_spawner(NULL);
+
+ } else {
+ printf("Starting P-Threading Model\n");
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(pthread_run, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/performance-thread/l3fwd-thread/test.sh b/examples/performance-thread/l3fwd-thread/test.sh
new file mode 100755
index 00000000..b7718b62
--- /dev/null
+++ b/examples/performance-thread/l3fwd-thread/test.sh
@@ -0,0 +1,149 @@
+#!/bin/bash
+
+case "$1" in
+
+ ######################
+ # 1 L-core per pcore #
+ ######################
+
+ "1.1")
+ echo "1.1 1 L-core per pcore (N=2)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,0,0)" \
+ --tx="(1,0)" \
+ --stat-lcore 2 \
+ --no-lthread
+
+ ;;
+
+ "1.2")
+ echo "1.2 1 L-core per pcore (N=4)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,1,1)" \
+ --tx="(2,0)(3,1)" \
+ --stat-lcore 4 \
+ --no-lthread
+ ;;
+
+ "1.3")
+ echo "1.3 1 L-core per pcore (N=8)"
+
+ ./build/l3fwd-thread -c 1ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,1,1)(1,0,2,2)(1,1,3,3)" \
+ --tx="(4,0)(5,1)(6,2)(7,3)" \
+ --stat-lcore 8 \
+ --no-lthread
+ ;;
+
+ "1.4")
+ echo "1.3 1 L-core per pcore (N=16)"
+
+ ./build/l3fwd-thread -c 3ffff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,1,1)(0,2,2,2)(0,3,3,3)(1,0,4,4)(1,1,5,5)(1,2,6,6)(1,3,7,7)" \
+ --tx="(8,0)(9,1)(10,2)(11,3)(12,4)(13,5)(14,6)(15,7)" \
+ --stat-lcore 16 \
+ --no-lthread
+ ;;
+
+
+ ######################
+ # N L-core per pcore #
+ ######################
+
+ "2.1")
+ echo "2.1 N L-core per pcore (N=2)"
+
+ ./build/l3fwd-thread -c ff -n 2 --lcores="2,(0-1)@0" -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,0,0)" \
+ --tx="(1,0)" \
+ --stat-lcore 2 \
+ --no-lthread
+
+ ;;
+
+ "2.2")
+ echo "2.2 N L-core per pcore (N=4)"
+
+ ./build/l3fwd-thread -c ff -n 2 --lcores="(0-3)@0,4" -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,1,1)" \
+ --tx="(2,0)(3,1)" \
+ --stat-lcore 4 \
+ --no-lthread
+ ;;
+
+ "2.3")
+ echo "2.3 N L-core per pcore (N=8)"
+
+ ./build/l3fwd-thread -c 3ffff -n 2 --lcores="(0-7)@0,8" -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,1,1)(1,0,2,2)(1,1,3,3)" \
+ --tx="(4,0)(5,1)(6,2)(7,3)" \
+ --stat-lcore 8 \
+ --no-lthread
+ ;;
+
+ "2.4")
+ echo "2.3 N L-core per pcore (N=16)"
+
+ ./build/l3fwd-thread -c 3ffff -n 2 --lcores="(0-15)@0,16" -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,1,1)(0,2,2,2)(0,3,3,3)(1,0,4,4)(1,1,5,5)(1,2,6,6)(1,3,7,7)" \
+ --tx="(8,0)(9,1)(10,2)(11,3)(12,4)(13,5)(14,6)(15,7)" \
+ --stat-lcore 16 \
+ --no-lthread
+ ;;
+
+
+ #########################
+ # N L-threads per pcore #
+ #########################
+
+ "3.1")
+ echo "3.1 N L-threads per pcore (N=2)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,0,0)" \
+ --tx="(0,0)" \
+ --stat-lcore 1
+ ;;
+
+ "3.2")
+ echo "3.2 N L-threads per pcore (N=4)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(1,0,0,1)" \
+ --tx="(0,0)(0,1)" \
+ --stat-lcore 1
+ ;;
+
+ "3.3")
+ echo "3.2 N L-threads per pcore (N=8)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,0,1)(1,0,0,2)(1,1,0,3)" \
+ --tx="(0,0)(0,1)(0,2)(0,3)" \
+ --stat-lcore 1
+ ;;
+
+ "3.4")
+ echo "3.2 N L-threads per pcore (N=16)"
+
+ ./build/l3fwd-thread -c ff -n 2 -- -P -p 3 \
+ --enable-jumbo --max-pkt-len 1500 \
+ --rx="(0,0,0,0)(0,1,0,1)(0,2,0,2)(0,0,0,3)(1,0,0,4)(1,1,0,5)(1,2,0,6)(1,3,0,7)" \
+ --tx="(0,0)(0,1)(0,2)(0,3)(0,4)(0,5)(0,6)(0,7)" \
+ --stat-lcore 1
+ ;;
+
+esac
diff --git a/examples/performance-thread/pthread_shim/Makefile b/examples/performance-thread/pthread_shim/Makefile
new file mode 100644
index 00000000..86ac657c
--- /dev/null
+++ b/examples/performance-thread/pthread_shim/Makefile
@@ -0,0 +1,60 @@
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = lthread_pthread_shim
+
+# all source are stored in SRCS-y
+SRCS-y := main.c pthread_shim.c
+INCLUDES := -I$(RTE_SDK)/$(RTE_TARGET)/include -I$(SRCDIR)
+include $(RTE_SDK)/examples/performance-thread/common/common.mk
+
+CFLAGS += -g -O3 $(USER_FLAGS) $(INCLUDES)
+CFLAGS += $(WERROR_FLAGS)
+
+LDFLAGS += -lpthread
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/performance-thread/pthread_shim/main.c b/examples/performance-thread/pthread_shim/main.c
new file mode 100644
index 00000000..f0357218
--- /dev/null
+++ b/examples/performance-thread/pthread_shim/main.c
@@ -0,0 +1,287 @@
+
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <string.h>
+#include <sys/queue.h>
+#include <stdarg.h>
+#include <errno.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
+
+#include <rte_common.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_timer.h>
+
+#include "lthread_api.h"
+#include "lthread_diag_api.h"
+#include "pthread_shim.h"
+
+#define DEBUG_APP 0
+#define HELLOW_WORLD_MAX_LTHREADS 10
+
+__thread int print_count;
+__thread pthread_mutex_t print_lock;
+
+__thread pthread_mutex_t exit_lock;
+__thread pthread_cond_t exit_cond;
+
+/*
+ * A simple thread that demonstrates use of a mutex, a condition
+ * variable, thread local storage, explicit yield, and thread exit.
+ *
+ * The thread uses a mutex to protect a shared counter which is incremented
+ * and then it waits on condition variable before exiting.
+ *
+ * The thread argument is stored in and retrieved from TLS, using
+ * the pthread key create, get and set specific APIs.
+ *
+ * The thread yields while holding the mutex, to provide opportunity
+ * for other threads to contend.
+ *
+ * All of the pthread API functions used by this thread are actually
+ * resolved to corresponding lthread functions by the pthread shim
+ * implemented in pthread_shim.c
+ */
+void *helloworld_pthread(void *arg);
+void *helloworld_pthread(void *arg)
+{
+ pthread_key_t key;
+
+ /* create a key for TLS */
+ pthread_key_create(&key, NULL);
+
+ /* store the arg in TLS */
+ pthread_setspecific(key, arg);
+
+ /* grab lock and increment shared counter */
+ pthread_mutex_lock(&print_lock);
+ print_count++;
+
+ /* yield thread to give opportunity for lock contention */
+ pthread_yield();
+
+ /* retrieve arg from TLS */
+ uint64_t thread_no = (uint64_t) pthread_getspecific(key);
+
+ printf("Hello - lcore = %d count = %d thread_no = %d thread_id = %p\n",
+ sched_getcpu(),
+ print_count,
+ (int) thread_no,
+ (void *)pthread_self());
+
+ /* release the lock */
+ pthread_mutex_unlock(&print_lock);
+
+ /*
+ * wait on condition variable
+ * before exiting
+ */
+ pthread_mutex_lock(&exit_lock);
+ pthread_cond_wait(&exit_cond, &exit_lock);
+ pthread_mutex_unlock(&exit_lock);
+
+ /* exit */
+ pthread_exit((void *) thread_no);
+}
+
+
+/*
+ * This is the initial thread
+ *
+ * It demonstrates pthread, mutex and condition variable creation,
+ * broadcast and pthread join APIs.
+ *
+ * This initial thread must always start life as an lthread.
+ *
+ * This thread creates many more threads then waits a short time
+ * before signalling them to exit using a broadcast.
+ *
+ * All of the pthread API functions used by this thread are actually
+ * resolved to corresponding lthread functions by the pthread shim
+ * implemented in pthread_shim.c
+ *
+ * After all threads have finished the lthread scheduler is shutdown
+ * and normal pthread operation is restored
+ */
+__thread pthread_t tid[HELLOW_WORLD_MAX_LTHREADS];
+
+static void initial_lthread(void *args);
+static void initial_lthread(void *args __attribute__((unused)))
+{
+ int lcore = (int) rte_lcore_id();
+ /*
+ *
+ * We can now enable pthread API override
+ * and start to use the pthread APIs
+ */
+ pthread_override_set(1);
+
+ uint64_t i;
+
+ /* initialize mutex for shared counter */
+ print_count = 0;
+ pthread_mutex_init(&print_lock, NULL);
+
+ /* initialize mutex and condition variable controlling thread exit */
+ pthread_mutex_init(&exit_lock, NULL);
+ pthread_cond_init(&exit_cond, NULL);
+
+ /* spawn a number of threads */
+ for (i = 0; i < HELLOW_WORLD_MAX_LTHREADS; i++) {
+
+ /*
+ * Not strictly necessary but
+ * for the sake of this example
+ * use an attribute to pass the desired lcore
+ */
+ pthread_attr_t attr;
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(lcore, &cpuset);
+ pthread_attr_init(&attr);
+ pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
+
+ /* create the thread */
+ pthread_create(&tid[i], &attr, helloworld_pthread, (void *) i);
+ }
+
+ /* wait for 1s to allow threads
+ * to block on the condition variable
+ * N.B. nanosleep() is resolved to lthread_sleep()
+ * by the shim.
+ */
+ struct timespec time;
+
+ time.tv_sec = 1;
+ time.tv_nsec = 0;
+ nanosleep(&time, NULL);
+
+ /* wake up all the threads */
+ pthread_cond_broadcast(&exit_cond);
+
+ /* wait for them to finish */
+ for (i = 0; i < HELLOW_WORLD_MAX_LTHREADS; i++) {
+
+ uint64_t thread_no;
+
+ pthread_join(tid[i], (void *) &thread_no);
+ if (thread_no != i)
+ printf("error on thread exit\n");
+ }
+
+ pthread_cond_destroy(&exit_cond);
+ pthread_mutex_destroy(&print_lock);
+ pthread_mutex_destroy(&exit_lock);
+
+ /* shutdown the lthread scheduler */
+ lthread_scheduler_shutdown(rte_lcore_id());
+ lthread_detach();
+}
+
+
+
+/* This thread creates a single initial lthread
+ * and then runs the scheduler
+ * An instance of this thread is created on each thread
+ * in the core mask
+ */
+static int
+lthread_scheduler(void *args);
+static int
+lthread_scheduler(void *args __attribute__((unused)))
+{
+ /* create initial thread */
+ struct lthread *lt;
+
+ lthread_create(&lt, -1, initial_lthread, (void *) NULL);
+
+ /* run the lthread scheduler */
+ lthread_run();
+
+ /* restore genuine pthread operation */
+ pthread_override_set(0);
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int num_sched = 0;
+
+ /* basic DPDK initialization is all that is necessary to run lthreads*/
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+
+ /* enable timer subsystem */
+ rte_timer_subsystem_init();
+
+#if DEBUG_APP
+ lthread_diagnostic_set_mask(LT_DIAG_ALL);
+#endif
+
+ /* create a scheduler on every core in the core mask
+ * and launch an initial lthread that will spawn many more.
+ */
+ unsigned lcore_id;
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id))
+ num_sched++;
+ }
+
+ /* set the number of schedulers, this forces all schedulers synchronize
+ * before entering their main loop
+ */
+ lthread_num_schedulers_set(num_sched);
+
+ /* launch all threads */
+ rte_eal_mp_remote_launch(lthread_scheduler, (void *)NULL, CALL_MASTER);
+
+ /* wait for threads to stop */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_wait_lcore(lcore_id);
+ }
+ return 0;
+}
diff --git a/examples/performance-thread/pthread_shim/pthread_shim.c b/examples/performance-thread/pthread_shim/pthread_shim.c
new file mode 100644
index 00000000..93c3216d
--- /dev/null
+++ b/examples/performance-thread/pthread_shim/pthread_shim.c
@@ -0,0 +1,719 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <errno.h>
+#define __USE_GNU
+#include <sched.h>
+#include <dlfcn.h>
+
+#include <rte_log.h>
+
+#include "lthread_api.h"
+#include "pthread_shim.h"
+
+#define RTE_LOGTYPE_PTHREAD_SHIM RTE_LOGTYPE_USER3
+
+#define POSIX_ERRNO(x) (x)
+
+/*
+ * this flag determines at run time if we override pthread
+ * calls and map then to equivalent lthread calls
+ * or of we call the standard pthread function
+ */
+static __thread int override;
+
+
+/*
+ * this structures contains function pointers that will be
+ * initialised to the loaded address of the real
+ * pthread library API functions
+ */
+struct pthread_lib_funcs {
+int (*f_pthread_barrier_destroy)
+ (pthread_barrier_t *);
+int (*f_pthread_barrier_init)
+ (pthread_barrier_t *, const pthread_barrierattr_t *, unsigned);
+int (*f_pthread_barrier_wait)
+ (pthread_barrier_t *);
+int (*f_pthread_cond_broadcast)
+ (pthread_cond_t *);
+int (*f_pthread_cond_destroy)
+ (pthread_cond_t *);
+int (*f_pthread_cond_init)
+ (pthread_cond_t *, const pthread_condattr_t *);
+int (*f_pthread_cond_signal)
+ (pthread_cond_t *);
+int (*f_pthread_cond_timedwait)
+ (pthread_cond_t *, pthread_mutex_t *, const struct timespec *);
+int (*f_pthread_cond_wait)
+ (pthread_cond_t *, pthread_mutex_t *);
+int (*f_pthread_create)
+ (pthread_t *, const pthread_attr_t *, void *(*)(void *), void *);
+int (*f_pthread_detach)
+ (pthread_t);
+int (*f_pthread_equal)
+ (pthread_t, pthread_t);
+void (*f_pthread_exit)
+ (void *);
+void * (*f_pthread_getspecific)
+ (pthread_key_t);
+int (*f_pthread_getcpuclockid)
+ (pthread_t, clockid_t *);
+int (*f_pthread_join)
+ (pthread_t, void **);
+int (*f_pthread_key_create)
+ (pthread_key_t *, void (*) (void *));
+int (*f_pthread_key_delete)
+ (pthread_key_t);
+int (*f_pthread_mutex_destroy)
+ (pthread_mutex_t *__mutex);
+int (*f_pthread_mutex_init)
+ (pthread_mutex_t *__mutex, const pthread_mutexattr_t *);
+int (*f_pthread_mutex_lock)
+ (pthread_mutex_t *__mutex);
+int (*f_pthread_mutex_trylock)
+ (pthread_mutex_t *__mutex);
+int (*f_pthread_mutex_timedlock)
+ (pthread_mutex_t *__mutex, const struct timespec *);
+int (*f_pthread_mutex_unlock)
+ (pthread_mutex_t *__mutex);
+int (*f_pthread_once)
+ (pthread_once_t *, void (*) (void));
+int (*f_pthread_rwlock_destroy)
+ (pthread_rwlock_t *__rwlock);
+int (*f_pthread_rwlock_init)
+ (pthread_rwlock_t *__rwlock, const pthread_rwlockattr_t *);
+int (*f_pthread_rwlock_rdlock)
+ (pthread_rwlock_t *__rwlock);
+int (*f_pthread_rwlock_timedrdlock)
+ (pthread_rwlock_t *__rwlock, const struct timespec *);
+int (*f_pthread_rwlock_timedwrlock)
+ (pthread_rwlock_t *__rwlock, const struct timespec *);
+int (*f_pthread_rwlock_tryrdlock)
+ (pthread_rwlock_t *__rwlock);
+int (*f_pthread_rwlock_trywrlock)
+ (pthread_rwlock_t *__rwlock);
+int (*f_pthread_rwlock_unlock)
+ (pthread_rwlock_t *__rwlock);
+int (*f_pthread_rwlock_wrlock)
+ (pthread_rwlock_t *__rwlock);
+pthread_t (*f_pthread_self)
+ (void);
+int (*f_pthread_setspecific)
+ (pthread_key_t, const void *);
+int (*f_pthread_spin_init)
+ (pthread_spinlock_t *__spin, int);
+int (*f_pthread_spin_destroy)
+ (pthread_spinlock_t *__spin);
+int (*f_pthread_spin_lock)
+ (pthread_spinlock_t *__spin);
+int (*f_pthread_spin_trylock)
+ (pthread_spinlock_t *__spin);
+int (*f_pthread_spin_unlock)
+ (pthread_spinlock_t *__spin);
+int (*f_pthread_cancel)
+ (pthread_t);
+int (*f_pthread_setcancelstate)
+ (int, int *);
+int (*f_pthread_setcanceltype)
+ (int, int *);
+void (*f_pthread_testcancel)
+ (void);
+int (*f_pthread_getschedparam)
+ (pthread_t pthread, int *, struct sched_param *);
+int (*f_pthread_setschedparam)
+ (pthread_t, int, const struct sched_param *);
+int (*f_pthread_yield)
+ (void);
+int (*f_pthread_setaffinity_np)
+ (pthread_t thread, size_t cpusetsize, const cpu_set_t *cpuset);
+int (*f_nanosleep)
+ (const struct timespec *req, struct timespec *rem);
+} _sys_pthread_funcs = {
+ .f_pthread_barrier_destroy = NULL,
+};
+
+
+/*
+ * this macro obtains the loaded address of a library function
+ * and saves it.
+ */
+static void *__libc_dl_handle = RTLD_NEXT;
+
+#define get_addr_of_loaded_symbol(name) do { \
+ char *error_str; \
+ _sys_pthread_funcs.f_##name = dlsym(__libc_dl_handle, (#name)); \
+ error_str = dlerror(); \
+ if (error_str != NULL) { \
+ fprintf(stderr, "%s\n", error_str); \
+ } \
+} while (0)
+
+
+/*
+ * The constructor function initialises the
+ * function pointers for pthread library functions
+ */
+void
+pthread_intercept_ctor(void)__attribute__((constructor));
+void
+pthread_intercept_ctor(void)
+{
+ override = 0;
+ /*
+ * Get the original functions
+ */
+ get_addr_of_loaded_symbol(pthread_barrier_destroy);
+ get_addr_of_loaded_symbol(pthread_barrier_init);
+ get_addr_of_loaded_symbol(pthread_barrier_wait);
+ get_addr_of_loaded_symbol(pthread_cond_broadcast);
+ get_addr_of_loaded_symbol(pthread_cond_destroy);
+ get_addr_of_loaded_symbol(pthread_cond_init);
+ get_addr_of_loaded_symbol(pthread_cond_signal);
+ get_addr_of_loaded_symbol(pthread_cond_timedwait);
+ get_addr_of_loaded_symbol(pthread_cond_wait);
+ get_addr_of_loaded_symbol(pthread_create);
+ get_addr_of_loaded_symbol(pthread_detach);
+ get_addr_of_loaded_symbol(pthread_equal);
+ get_addr_of_loaded_symbol(pthread_exit);
+ get_addr_of_loaded_symbol(pthread_getspecific);
+ get_addr_of_loaded_symbol(pthread_getcpuclockid);
+ get_addr_of_loaded_symbol(pthread_join);
+ get_addr_of_loaded_symbol(pthread_key_create);
+ get_addr_of_loaded_symbol(pthread_key_delete);
+ get_addr_of_loaded_symbol(pthread_mutex_destroy);
+ get_addr_of_loaded_symbol(pthread_mutex_init);
+ get_addr_of_loaded_symbol(pthread_mutex_lock);
+ get_addr_of_loaded_symbol(pthread_mutex_trylock);
+ get_addr_of_loaded_symbol(pthread_mutex_timedlock);
+ get_addr_of_loaded_symbol(pthread_mutex_unlock);
+ get_addr_of_loaded_symbol(pthread_once);
+ get_addr_of_loaded_symbol(pthread_rwlock_destroy);
+ get_addr_of_loaded_symbol(pthread_rwlock_init);
+ get_addr_of_loaded_symbol(pthread_rwlock_rdlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_timedrdlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_timedwrlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_tryrdlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_trywrlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_unlock);
+ get_addr_of_loaded_symbol(pthread_rwlock_wrlock);
+ get_addr_of_loaded_symbol(pthread_self);
+ get_addr_of_loaded_symbol(pthread_setspecific);
+ get_addr_of_loaded_symbol(pthread_spin_init);
+ get_addr_of_loaded_symbol(pthread_spin_destroy);
+ get_addr_of_loaded_symbol(pthread_spin_lock);
+ get_addr_of_loaded_symbol(pthread_spin_trylock);
+ get_addr_of_loaded_symbol(pthread_spin_unlock);
+ get_addr_of_loaded_symbol(pthread_cancel);
+ get_addr_of_loaded_symbol(pthread_setcancelstate);
+ get_addr_of_loaded_symbol(pthread_setcanceltype);
+ get_addr_of_loaded_symbol(pthread_testcancel);
+ get_addr_of_loaded_symbol(pthread_getschedparam);
+ get_addr_of_loaded_symbol(pthread_setschedparam);
+ get_addr_of_loaded_symbol(pthread_yield);
+ get_addr_of_loaded_symbol(pthread_setaffinity_np);
+ get_addr_of_loaded_symbol(nanosleep);
+}
+
+
+/*
+ * Enable/Disable pthread override
+ * state
+ * 0 disable
+ * 1 enable
+ */
+void pthread_override_set(int state)
+{
+ override = state;
+}
+
+
+/*
+ * Return pthread override state
+ * return
+ * 0 disable
+ * 1 enable
+ */
+int pthread_override_get(void)
+{
+ return override;
+}
+
+/*
+ * This macro is used to catch and log
+ * invocation of stubs for unimplemented pthread
+ * API functions.
+ */
+#define NOT_IMPLEMENTED do { \
+ if (override) { \
+ RTE_LOG(WARNING, \
+ PTHREAD_SHIM, \
+ "WARNING %s NOT IMPLEMENTED\n", \
+ __func__); \
+ } \
+} while (0)
+
+/*
+ * pthread API override functions follow
+ * Note in this example code only a subset of functions are
+ * implemented.
+ *
+ * The stub functions provided will issue a warning log
+ * message if an unimplemented function is invoked
+ *
+ */
+
+int pthread_barrier_destroy(pthread_barrier_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_barrier_destroy(a);
+}
+
+int
+pthread_barrier_init(pthread_barrier_t *a,
+ const pthread_barrierattr_t *b, unsigned c)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_barrier_init(a, b, c);
+}
+
+int pthread_barrier_wait(pthread_barrier_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_barrier_wait(a);
+}
+
+int pthread_cond_broadcast(pthread_cond_t *cond)
+{
+ if (override) {
+
+ lthread_cond_broadcast(*(struct lthread_cond **)cond);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_cond_broadcast(cond);
+}
+
+int pthread_mutex_destroy(pthread_mutex_t *mutex)
+{
+ if (override)
+ return lthread_mutex_destroy(*(struct lthread_mutex **)mutex);
+ return _sys_pthread_funcs.f_pthread_mutex_destroy(mutex);
+}
+
+int pthread_cond_destroy(pthread_cond_t *cond)
+{
+ if (override)
+ return lthread_cond_destroy(*(struct lthread_cond **)cond);
+ return _sys_pthread_funcs.f_pthread_cond_destroy(cond);
+}
+
+int pthread_cond_init(pthread_cond_t *cond, const pthread_condattr_t *attr)
+{
+ if (override)
+ return lthread_cond_init(NULL,
+ (struct lthread_cond **)cond,
+ (const struct lthread_condattr *) attr);
+ return _sys_pthread_funcs.f_pthread_cond_init(cond, attr);
+}
+
+int pthread_cond_signal(pthread_cond_t *cond)
+{
+ if (override) {
+ lthread_cond_signal(*(struct lthread_cond **)cond);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_cond_signal(cond);
+}
+
+int
+pthread_cond_timedwait(pthread_cond_t *__restrict cond,
+ pthread_mutex_t *__restrict mutex,
+ const struct timespec *__restrict time)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_cond_timedwait(cond, mutex, time);
+}
+
+int pthread_cond_wait(pthread_cond_t *cond, pthread_mutex_t *mutex)
+{
+ if (override) {
+ pthread_mutex_unlock(mutex);
+ int rv = lthread_cond_wait(*(struct lthread_cond **)cond, 0);
+
+ pthread_mutex_lock(mutex);
+ return rv;
+ }
+ return _sys_pthread_funcs.f_pthread_cond_wait(cond, mutex);
+}
+
+int
+pthread_create(pthread_t *__restrict tid,
+ const pthread_attr_t *__restrict attr,
+ void *(func) (void *),
+ void *__restrict arg)
+{
+ if (override) {
+ int lcore = -1;
+
+ if (attr != NULL) {
+ /* determine CPU being requested */
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ pthread_attr_getaffinity_np(attr,
+ sizeof(cpu_set_t),
+ &cpuset);
+
+ if (CPU_COUNT(&cpuset) != 1)
+ return POSIX_ERRNO(EINVAL);
+
+ for (lcore = 0; lcore < LTHREAD_MAX_LCORES; lcore++) {
+ if (!CPU_ISSET(lcore, &cpuset))
+ continue;
+ break;
+ }
+ }
+ return lthread_create((struct lthread **)tid, lcore,
+ (void (*)(void *))func, arg);
+ }
+ return _sys_pthread_funcs.f_pthread_create(tid, attr, func, arg);
+}
+
+int pthread_detach(pthread_t tid)
+{
+ if (override) {
+ struct lthread *lt = (struct lthread *)tid;
+
+ if (lt == lthread_current())
+ lthread_detach();
+ return 0;
+ NOT_IMPLEMENTED;
+ }
+ return _sys_pthread_funcs.f_pthread_detach(tid);
+}
+
+int pthread_equal(pthread_t a, pthread_t b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_equal(a, b);
+}
+
+void pthread_exit_override(void *v)
+{
+ if (override) {
+ lthread_exit(v);
+ return;
+ }
+ _sys_pthread_funcs.f_pthread_exit(v);
+}
+
+void
+*pthread_getspecific(pthread_key_t key)
+{
+ if (override)
+ return lthread_getspecific((unsigned int) key);
+ return _sys_pthread_funcs.f_pthread_getspecific(key);
+}
+
+int pthread_getcpuclockid(pthread_t a, clockid_t *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_getcpuclockid(a, b);
+}
+
+int pthread_join(pthread_t tid, void **val)
+{
+ if (override)
+ return lthread_join((struct lthread *)tid, val);
+ return _sys_pthread_funcs.f_pthread_join(tid, val);
+}
+
+int pthread_key_create(pthread_key_t *keyptr, void (*dtor) (void *))
+{
+ if (override)
+ return lthread_key_create((unsigned int *)keyptr, dtor);
+ return _sys_pthread_funcs.f_pthread_key_create(keyptr, dtor);
+}
+
+int pthread_key_delete(pthread_key_t key)
+{
+ if (override) {
+ lthread_key_delete((unsigned int) key);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_key_delete(key);
+}
+
+
+int
+pthread_mutex_init(pthread_mutex_t *mutex, const pthread_mutexattr_t *attr)
+{
+ if (override)
+ return lthread_mutex_init(NULL,
+ (struct lthread_mutex **)mutex,
+ (const struct lthread_mutexattr *)attr);
+ return _sys_pthread_funcs.f_pthread_mutex_init(mutex, attr);
+}
+
+int pthread_mutex_lock(pthread_mutex_t *mutex)
+{
+ if (override)
+ return lthread_mutex_lock(*(struct lthread_mutex **)mutex);
+ return _sys_pthread_funcs.f_pthread_mutex_lock(mutex);
+}
+
+int pthread_mutex_trylock(pthread_mutex_t *mutex)
+{
+ if (override)
+ return lthread_mutex_trylock(*(struct lthread_mutex **)mutex);
+ return _sys_pthread_funcs.f_pthread_mutex_trylock(mutex);
+}
+
+int pthread_mutex_timedlock(pthread_mutex_t *mutex, const struct timespec *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_mutex_timedlock(mutex, b);
+}
+
+int pthread_mutex_unlock(pthread_mutex_t *mutex)
+{
+ if (override)
+ return lthread_mutex_unlock(*(struct lthread_mutex **)mutex);
+ return _sys_pthread_funcs.f_pthread_mutex_unlock(mutex);
+}
+
+int pthread_once(pthread_once_t *a, void (b) (void))
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_once(a, b);
+}
+
+int pthread_rwlock_destroy(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_destroy(a);
+}
+
+int pthread_rwlock_init(pthread_rwlock_t *a, const pthread_rwlockattr_t *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_init(a, b);
+}
+
+int pthread_rwlock_rdlock(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_rdlock(a);
+}
+
+int pthread_rwlock_timedrdlock(pthread_rwlock_t *a, const struct timespec *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_timedrdlock(a, b);
+}
+
+int pthread_rwlock_timedwrlock(pthread_rwlock_t *a, const struct timespec *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_timedwrlock(a, b);
+}
+
+int pthread_rwlock_tryrdlock(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_tryrdlock(a);
+}
+
+int pthread_rwlock_trywrlock(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_trywrlock(a);
+}
+
+int pthread_rwlock_unlock(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_unlock(a);
+}
+
+int pthread_rwlock_wrlock(pthread_rwlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_rwlock_wrlock(a);
+}
+
+int pthread_yield(void)
+{
+ if (override) {
+ lthread_yield();
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_yield();
+
+}
+
+pthread_t pthread_self(void)
+{
+ if (override)
+ return (pthread_t) lthread_current();
+ return _sys_pthread_funcs.f_pthread_self();
+}
+
+int pthread_setspecific(pthread_key_t key, const void *data)
+{
+ if (override) {
+ int rv = lthread_setspecific((unsigned int)key, data);
+ return rv;
+ }
+ return _sys_pthread_funcs.f_pthread_setspecific(key, data);
+}
+
+int pthread_spin_init(pthread_spinlock_t *a, int b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_spin_init(a, b);
+}
+
+int pthread_spin_destroy(pthread_spinlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_spin_destroy(a);
+}
+
+int pthread_spin_lock(pthread_spinlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_spin_lock(a);
+}
+
+int pthread_spin_trylock(pthread_spinlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_spin_trylock(a);
+}
+
+int pthread_spin_unlock(pthread_spinlock_t *a)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_spin_unlock(a);
+}
+
+int pthread_cancel(pthread_t tid)
+{
+ if (override) {
+ lthread_cancel(*(struct lthread **)tid);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_cancel(tid);
+}
+
+int pthread_setcancelstate(int a, int *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_setcancelstate(a, b);
+}
+
+int pthread_setcanceltype(int a, int *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_setcanceltype(a, b);
+}
+
+void pthread_testcancel(void)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_testcancel();
+}
+
+
+int pthread_getschedparam(pthread_t tid, int *a, struct sched_param *b)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_getschedparam(tid, a, b);
+}
+
+int pthread_setschedparam(pthread_t a, int b, const struct sched_param *c)
+{
+ NOT_IMPLEMENTED;
+ return _sys_pthread_funcs.f_pthread_setschedparam(a, b, c);
+}
+
+
+int nanosleep(const struct timespec *req, struct timespec *rem)
+{
+ if (override) {
+ uint64_t ns = req->tv_sec * 1000000000 + req->tv_nsec;
+
+ lthread_sleep(ns);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_nanosleep(req, rem);
+}
+
+int
+pthread_setaffinity_np(pthread_t thread, size_t cpusetsize,
+ const cpu_set_t *cpuset)
+{
+ if (override) {
+ /* we only allow affinity with a single CPU */
+ if (CPU_COUNT(cpuset) != 1)
+ return POSIX_ERRNO(EINVAL);
+
+ /* we only allow the current thread to sets its own affinity */
+ struct lthread *lt = (struct lthread *)thread;
+
+ if (lthread_current() != lt)
+ return POSIX_ERRNO(EINVAL);
+
+ /* determine the CPU being requested */
+ int i;
+
+ for (i = 0; i < LTHREAD_MAX_LCORES; i++) {
+ if (!CPU_ISSET(i, cpuset))
+ continue;
+ break;
+ }
+ /* check requested core is allowed */
+ if (i == LTHREAD_MAX_LCORES)
+ return POSIX_ERRNO(EINVAL);
+
+ /* finally we can set affinity to the requested lcore */
+ lthread_set_affinity(i);
+ return 0;
+ }
+ return _sys_pthread_funcs.f_pthread_setaffinity_np(thread, cpusetsize,
+ cpuset);
+}
diff --git a/examples/performance-thread/pthread_shim/pthread_shim.h b/examples/performance-thread/pthread_shim/pthread_shim.h
new file mode 100644
index 00000000..78bbb5ac
--- /dev/null
+++ b/examples/performance-thread/pthread_shim/pthread_shim.h
@@ -0,0 +1,113 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _PTHREAD_SHIM_H_
+#define _PTHREAD_SHIM_H_
+#include <pthread.h>
+
+/*
+ * This pthread shim is an example that demonstrates how legacy code
+ * that makes use of POSIX pthread services can make use of lthreads
+ * with reduced porting effort.
+ *
+ * N.B. The example is not a complete implementation, only a subset of
+ * pthread APIs sufficient to demonstrate the principle of operation
+ * are implemented.
+ *
+ * In general pthread attribute objects do not have equivalent functions
+ * in lthreads, and are ignored.
+ *
+ * There is one exception and that is the use of attr to specify a
+ * core affinity in calls to pthread_create.
+ *
+ * The shim operates as follows:-
+ *
+ * On initialisation a constructor function uses dlsym to obtain and
+ * save the loaded address of the full set of pthread APIs that will
+ * be overridden.
+ *
+ * For each function there is a stub provided that will invoke either
+ * the genuine pthread library function saved saved by the constructor,
+ * or else the corresponding equivalent lthread function.
+ *
+ * The stub functions are implemented in pthread_shim.c
+ *
+ * The stub will take care of adapting parameters, and any police
+ * any constraints where lthread functionality differs.
+ *
+ * The initial thread must always be a pure lthread.
+ *
+ * The decision whether to invoke the real library function or the lthread
+ * function is controlled by a per pthread flag that can be switched
+ * on of off by the pthread_override_set() API described below. Typcially
+ * this should be done as the first action of the initial lthread.
+ *
+ * N.B In general it would be poor practice to revert to invoke a real
+ * pthread function when running as an lthread, since these may block and
+ * effectively stall the lthread scheduler.
+ *
+ */
+
+
+/*
+ * An exiting lthread must not terminate the pthread it is running in
+ * since this would mean terminating the lthread scheduler.
+ * We override pthread_exit() with a macro because it is typically declared with
+ * __attribute__((noreturn))
+ */
+void pthread_exit_override(void *v);
+
+#define pthread_exit(v) do { \
+ pthread_exit_override((v)); \
+ return NULL; \
+} while (0)
+
+/*
+ * Enable/Disable pthread override
+ * state
+ * 0 disable
+ * 1 enable
+ */
+void pthread_override_set(int state);
+
+
+/*
+ * Return pthread override state
+ * return
+ * 0 disable
+ * 1 enable
+ */
+int pthread_override_get(void);
+
+
+#endif /* _PTHREAD_SHIM_H_ */
diff --git a/examples/ptpclient/Makefile b/examples/ptpclient/Makefile
new file mode 100644
index 00000000..d241730f
--- /dev/null
+++ b/examples/ptpclient/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriddegitn by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = ptpclient
+
+# all source are stored in SRCS-y
+SRCS-y := ptpclient.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+LDLIBS += -lrt
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/ptpclient/ptpclient.c b/examples/ptpclient/ptpclient.c
new file mode 100644
index 00000000..0af4f3b6
--- /dev/null
+++ b/examples/ptpclient/ptpclient.c
@@ -0,0 +1,780 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * This application is a simple Layer 2 PTP v2 client. It shows delta values
+ * which are used to synchronize the PHC clock. if the "-T 1" parameter is
+ * passed to the application the Linux kernel clock is also synchronized.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+#include <rte_ip.h>
+#include <limits.h>
+#include <sys/time.h>
+#include <getopt.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+
+/* Values for the PTP messageType field. */
+#define SYNC 0x0
+#define DELAY_REQ 0x1
+#define PDELAY_REQ 0x2
+#define PDELAY_RESP 0x3
+#define FOLLOW_UP 0x8
+#define DELAY_RESP 0x9
+#define PDELAY_RESP_FOLLOW_UP 0xA
+#define ANNOUNCE 0xB
+#define SIGNALING 0xC
+#define MANAGEMENT 0xD
+
+#define NSEC_PER_SEC 1000000000L
+#define KERNEL_TIME_ADJUST_LIMIT 20000
+#define PTP_PROTOCOL 0x88F7
+
+struct rte_mempool *mbuf_pool;
+uint32_t ptp_enabled_port_mask;
+uint8_t ptp_enabled_port_nb;
+static uint8_t ptp_enabled_ports[RTE_MAX_ETHPORTS];
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN }
+};
+
+static const struct ether_addr ether_multicast = {
+ .addr_bytes = {0x01, 0x1b, 0x19, 0x0, 0x0, 0x0}
+};
+
+/* Structs used for PTP handling. */
+struct tstamp {
+ uint16_t sec_msb;
+ uint32_t sec_lsb;
+ uint32_t ns;
+} __attribute__((packed));
+
+struct clock_id {
+ uint8_t id[8];
+};
+
+struct port_id {
+ struct clock_id clock_id;
+ uint16_t port_number;
+} __attribute__((packed));
+
+struct ptp_header {
+ uint8_t msg_type;
+ uint8_t ver;
+ uint16_t message_length;
+ uint8_t domain_number;
+ uint8_t reserved1;
+ uint8_t flag_field[2];
+ int64_t correction;
+ uint32_t reserved2;
+ struct port_id source_port_id;
+ uint16_t seq_id;
+ uint8_t control;
+ int8_t log_message_interval;
+} __attribute__((packed));
+
+struct sync_msg {
+ struct ptp_header hdr;
+ struct tstamp origin_tstamp;
+} __attribute__((packed));
+
+struct follow_up_msg {
+ struct ptp_header hdr;
+ struct tstamp precise_origin_tstamp;
+ uint8_t suffix[0];
+} __attribute__((packed));
+
+struct delay_req_msg {
+ struct ptp_header hdr;
+ struct tstamp origin_tstamp;
+} __attribute__((packed));
+
+struct delay_resp_msg {
+ struct ptp_header hdr;
+ struct tstamp rx_tstamp;
+ struct port_id req_port_id;
+ uint8_t suffix[0];
+} __attribute__((packed));
+
+struct ptp_message {
+ union {
+ struct ptp_header header;
+ struct sync_msg sync;
+ struct delay_req_msg delay_req;
+ struct follow_up_msg follow_up;
+ struct delay_resp_msg delay_resp;
+ } __attribute__((packed));
+};
+
+struct ptpv2_data_slave_ordinary {
+ struct rte_mbuf *m;
+ struct timespec tstamp1;
+ struct timespec tstamp2;
+ struct timespec tstamp3;
+ struct timespec tstamp4;
+ struct clock_id client_clock_id;
+ struct clock_id master_clock_id;
+ struct timeval new_adj;
+ int64_t delta;
+ uint8_t portid;
+ uint16_t seqID_SYNC;
+ uint16_t seqID_FOLLOWUP;
+ uint8_t ptpset;
+ uint8_t kernel_time_set;
+ uint8_t current_ptp_port;
+};
+
+static struct ptpv2_data_slave_ordinary ptp_data;
+
+static inline uint64_t timespec64_to_ns(const struct timespec *ts)
+{
+ return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec;
+}
+
+static struct timeval
+ns_to_timeval(int64_t nsec)
+{
+ struct timespec t_spec = {0, 0};
+ struct timeval t_eval = {0, 0};
+ int32_t rem;
+
+ if (nsec == 0)
+ return t_eval;
+ rem = nsec % NSEC_PER_SEC;
+ t_spec.tv_sec = nsec / NSEC_PER_SEC;
+
+ if (rem < 0) {
+ t_spec.tv_sec--;
+ rem += NSEC_PER_SEC;
+ }
+
+ t_spec.tv_nsec = rem;
+ t_eval.tv_sec = t_spec.tv_sec;
+ t_eval.tv_usec = t_spec.tv_nsec / 1000;
+
+ return t_eval;
+}
+
+/*
+ * Initializes a given port using global settings and with the RX buffers
+ * coming from the mbuf_pool passed as a parameter.
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1;
+ const uint16_t tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Allocate and set up 1 RX queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Allocate and set up 1 TX queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ /* Setup txq_flags */
+ struct rte_eth_txconf *txconf;
+
+ rte_eth_dev_info_get(q, &dev_info);
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), txconf);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Enable timesync timestamping for the Ethernet device */
+ rte_eth_timesync_enable(port);
+
+ /* Enable RX in promiscuous mode for the Ethernet device. */
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+static void
+print_clock_info(struct ptpv2_data_slave_ordinary *ptp_data)
+{
+ int64_t nsec;
+ struct timespec net_time, sys_time;
+
+ printf("Master Clock id: %02x:%02x:%02x:%02x:%02x:%02x:%02x:%02x",
+ ptp_data->master_clock_id.id[0],
+ ptp_data->master_clock_id.id[1],
+ ptp_data->master_clock_id.id[2],
+ ptp_data->master_clock_id.id[3],
+ ptp_data->master_clock_id.id[4],
+ ptp_data->master_clock_id.id[5],
+ ptp_data->master_clock_id.id[6],
+ ptp_data->master_clock_id.id[7]);
+
+ printf("\nT2 - Slave Clock. %lds %ldns",
+ (ptp_data->tstamp2.tv_sec),
+ (ptp_data->tstamp2.tv_nsec));
+
+ printf("\nT1 - Master Clock. %lds %ldns ",
+ ptp_data->tstamp1.tv_sec,
+ (ptp_data->tstamp1.tv_nsec));
+
+ printf("\nT3 - Slave Clock. %lds %ldns",
+ ptp_data->tstamp3.tv_sec,
+ (ptp_data->tstamp3.tv_nsec));
+
+ printf("\nT4 - Master Clock. %lds %ldns ",
+ ptp_data->tstamp4.tv_sec,
+ (ptp_data->tstamp4.tv_nsec));
+
+ printf("\nDelta between master and slave clocks:%"PRId64"ns\n",
+ ptp_data->delta);
+
+ clock_gettime(CLOCK_REALTIME, &sys_time);
+ rte_eth_timesync_read_time(ptp_data->current_ptp_port, &net_time);
+
+ time_t ts = net_time.tv_sec;
+
+ printf("\n\nComparison between Linux kernel Time and PTP:");
+
+ printf("\nCurrent PTP Time: %.24s %.9ld ns",
+ ctime(&ts), net_time.tv_nsec);
+
+ nsec = (int64_t)timespec64_to_ns(&net_time) -
+ (int64_t)timespec64_to_ns(&sys_time);
+ ptp_data->new_adj = ns_to_timeval(nsec);
+
+ gettimeofday(&ptp_data->new_adj, NULL);
+
+ time_t tp = ptp_data->new_adj.tv_sec;
+
+ printf("\nCurrent SYS Time: %.24s %.6ld ns",
+ ctime(&tp), ptp_data->new_adj.tv_usec);
+
+ printf("\nDelta between PTP and Linux Kernel time:%"PRId64"ns\n",
+ nsec);
+
+ printf("[Ctrl+C to quit]\n");
+
+ /* Clear screen and put cursor in column 1, row 1 */
+ printf("\033[2J\033[1;1H");
+}
+
+static int64_t
+delta_eval(struct ptpv2_data_slave_ordinary *ptp_data)
+{
+ int64_t delta;
+ uint64_t t1 = 0;
+ uint64_t t2 = 0;
+ uint64_t t3 = 0;
+ uint64_t t4 = 0;
+
+ t1 = timespec64_to_ns(&ptp_data->tstamp1);
+ t2 = timespec64_to_ns(&ptp_data->tstamp2);
+ t3 = timespec64_to_ns(&ptp_data->tstamp3);
+ t4 = timespec64_to_ns(&ptp_data->tstamp4);
+
+ delta = -((int64_t)((t2 - t1) - (t4 - t3))) / 2;
+
+ return delta;
+}
+
+/*
+ * Parse the PTP SYNC message.
+ */
+static void
+parse_sync(struct ptpv2_data_slave_ordinary *ptp_data, uint16_t rx_tstamp_idx)
+{
+ struct ptp_header *ptp_hdr;
+
+ ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(ptp_data->m, char *)
+ + sizeof(struct ether_hdr));
+ ptp_data->seqID_SYNC = rte_be_to_cpu_16(ptp_hdr->seq_id);
+
+ if (ptp_data->ptpset == 0) {
+ rte_memcpy(&ptp_data->master_clock_id,
+ &ptp_hdr->source_port_id.clock_id,
+ sizeof(struct clock_id));
+ ptp_data->ptpset = 1;
+ }
+
+ if (memcmp(&ptp_hdr->source_port_id.clock_id,
+ &ptp_hdr->source_port_id.clock_id,
+ sizeof(struct clock_id)) == 0) {
+
+ if (ptp_data->ptpset == 1)
+ rte_eth_timesync_read_rx_timestamp(ptp_data->portid,
+ &ptp_data->tstamp2, rx_tstamp_idx);
+ }
+
+}
+
+/*
+ * Parse the PTP FOLLOWUP message and send DELAY_REQ to the master clock.
+ */
+static void
+parse_fup(struct ptpv2_data_slave_ordinary *ptp_data)
+{
+ struct ether_hdr *eth_hdr;
+ struct ptp_header *ptp_hdr;
+ struct clock_id *client_clkid;
+ struct ptp_message *ptp_msg;
+ struct rte_mbuf *created_pkt;
+ struct tstamp *origin_tstamp;
+ struct ether_addr eth_multicast = ether_multicast;
+ size_t pkt_size;
+ int wait_us;
+ struct rte_mbuf *m = ptp_data->m;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(m, char *)
+ + sizeof(struct ether_hdr));
+ if (memcmp(&ptp_data->master_clock_id,
+ &ptp_hdr->source_port_id.clock_id,
+ sizeof(struct clock_id)) != 0)
+ return;
+
+ ptp_data->seqID_FOLLOWUP = rte_be_to_cpu_16(ptp_hdr->seq_id);
+ ptp_msg = (struct ptp_message *) (rte_pktmbuf_mtod(m, char *) +
+ sizeof(struct ether_hdr));
+
+ origin_tstamp = &ptp_msg->follow_up.precise_origin_tstamp;
+ ptp_data->tstamp1.tv_nsec = ntohl(origin_tstamp->ns);
+ ptp_data->tstamp1.tv_sec =
+ ((uint64_t)ntohl(origin_tstamp->sec_lsb)) |
+ (((uint64_t)ntohs(origin_tstamp->sec_msb)) << 32);
+
+ if (ptp_data->seqID_FOLLOWUP == ptp_data->seqID_SYNC) {
+
+ created_pkt = rte_pktmbuf_alloc(mbuf_pool);
+ pkt_size = sizeof(struct ether_hdr) +
+ sizeof(struct ptp_message);
+ created_pkt->data_len = pkt_size;
+ created_pkt->pkt_len = pkt_size;
+ eth_hdr = rte_pktmbuf_mtod(created_pkt, struct ether_hdr *);
+ rte_eth_macaddr_get(ptp_data->portid, &eth_hdr->s_addr);
+
+ /* Set multicast address 01-1B-19-00-00-00. */
+ ether_addr_copy(&eth_multicast, &eth_hdr->d_addr);
+
+ eth_hdr->ether_type = htons(PTP_PROTOCOL);
+ ptp_msg = (struct ptp_message *)
+ (rte_pktmbuf_mtod(created_pkt, char *) +
+ sizeof(struct ether_hdr));
+
+ ptp_msg->delay_req.hdr.seq_id = htons(ptp_data->seqID_SYNC);
+ ptp_msg->delay_req.hdr.msg_type = DELAY_REQ;
+ ptp_msg->delay_req.hdr.ver = 2;
+ ptp_msg->delay_req.hdr.control = 1;
+ ptp_msg->delay_req.hdr.log_message_interval = 127;
+
+ /* Set up clock id. */
+ client_clkid =
+ &ptp_msg->delay_req.hdr.source_port_id.clock_id;
+
+ client_clkid->id[0] = eth_hdr->s_addr.addr_bytes[0];
+ client_clkid->id[1] = eth_hdr->s_addr.addr_bytes[1];
+ client_clkid->id[2] = eth_hdr->s_addr.addr_bytes[2];
+ client_clkid->id[3] = 0xFF;
+ client_clkid->id[4] = 0xFE;
+ client_clkid->id[5] = eth_hdr->s_addr.addr_bytes[3];
+ client_clkid->id[6] = eth_hdr->s_addr.addr_bytes[4];
+ client_clkid->id[7] = eth_hdr->s_addr.addr_bytes[5];
+
+ rte_memcpy(&ptp_data->client_clock_id,
+ client_clkid,
+ sizeof(struct clock_id));
+
+ /* Enable flag for hardware timestamping. */
+ created_pkt->ol_flags |= PKT_TX_IEEE1588_TMST;
+
+ /*Read value from NIC to prevent latching with old value. */
+ rte_eth_timesync_read_tx_timestamp(ptp_data->portid,
+ &ptp_data->tstamp3);
+
+ /* Transmit the packet. */
+ rte_eth_tx_burst(ptp_data->portid, 0, &created_pkt, 1);
+
+ wait_us = 0;
+ ptp_data->tstamp3.tv_nsec = 0;
+ ptp_data->tstamp3.tv_sec = 0;
+
+ /* Wait at least 1 us to read TX timestamp. */
+ while ((rte_eth_timesync_read_tx_timestamp(ptp_data->portid,
+ &ptp_data->tstamp3) < 0) && (wait_us < 1000)) {
+ rte_delay_us(1);
+ wait_us++;
+ }
+ }
+}
+
+/*
+ * Update the kernel time with the difference between it and the current NIC
+ * time.
+ */
+static inline void
+update_kernel_time(void)
+{
+ int64_t nsec;
+ struct timespec net_time, sys_time;
+
+ clock_gettime(CLOCK_REALTIME, &sys_time);
+ rte_eth_timesync_read_time(ptp_data.current_ptp_port, &net_time);
+
+ nsec = (int64_t)timespec64_to_ns(&net_time) -
+ (int64_t)timespec64_to_ns(&sys_time);
+
+ ptp_data.new_adj = ns_to_timeval(nsec);
+
+ /*
+ * If difference between kernel time and system time in NIC is too big
+ * (more than +/- 20 microseconds), use clock_settime to set directly
+ * the kernel time, as adjtime is better for small adjustments (takes
+ * longer to adjust the time).
+ */
+
+ if (nsec > KERNEL_TIME_ADJUST_LIMIT || nsec < -KERNEL_TIME_ADJUST_LIMIT)
+ clock_settime(CLOCK_REALTIME, &net_time);
+ else
+ adjtime(&ptp_data.new_adj, 0);
+
+
+}
+
+/*
+ * Parse the DELAY_RESP message.
+ */
+static void
+parse_drsp(struct ptpv2_data_slave_ordinary *ptp_data)
+{
+ struct rte_mbuf *m = ptp_data->m;
+ struct ptp_message *ptp_msg;
+ struct tstamp *rx_tstamp;
+ uint16_t seq_id;
+
+ ptp_msg = (struct ptp_message *) (rte_pktmbuf_mtod(m, char *) +
+ sizeof(struct ether_hdr));
+ seq_id = rte_be_to_cpu_16(ptp_msg->delay_resp.hdr.seq_id);
+ if (memcmp(&ptp_data->client_clock_id,
+ &ptp_msg->delay_resp.req_port_id.clock_id,
+ sizeof(struct clock_id)) == 0) {
+ if (seq_id == ptp_data->seqID_FOLLOWUP) {
+ rx_tstamp = &ptp_msg->delay_resp.rx_tstamp;
+ ptp_data->tstamp4.tv_nsec = ntohl(rx_tstamp->ns);
+ ptp_data->tstamp4.tv_sec =
+ ((uint64_t)ntohl(rx_tstamp->sec_lsb)) |
+ (((uint64_t)ntohs(rx_tstamp->sec_msb)) << 32);
+
+ /* Evaluate the delta for adjustment. */
+ ptp_data->delta = delta_eval(ptp_data);
+
+ rte_eth_timesync_adjust_time(ptp_data->portid,
+ ptp_data->delta);
+
+ ptp_data->current_ptp_port = ptp_data->portid;
+
+ /* Update kernel time if enabled in app parameters. */
+ if (ptp_data->kernel_time_set == 1)
+ update_kernel_time();
+
+
+
+ }
+ }
+}
+
+/* This function processes PTP packets, implementing slave PTP IEEE1588 L2
+ * functionality.
+ */
+static void
+parse_ptp_frames(uint8_t portid, struct rte_mbuf *m) {
+ struct ptp_header *ptp_hdr;
+ struct ether_hdr *eth_hdr;
+ uint16_t eth_type;
+
+ eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ eth_type = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+ if (eth_type == PTP_PROTOCOL) {
+ ptp_data.m = m;
+ ptp_data.portid = portid;
+ ptp_hdr = (struct ptp_header *)(rte_pktmbuf_mtod(m, char *)
+ + sizeof(struct ether_hdr));
+
+ switch (ptp_hdr->msg_type) {
+ case SYNC:
+ parse_sync(&ptp_data, m->timesync);
+ break;
+ case FOLLOW_UP:
+ parse_fup(&ptp_data);
+ break;
+ case DELAY_RESP:
+ parse_drsp(&ptp_data);
+ print_clock_info(&ptp_data);
+ break;
+ default:
+ break;
+ }
+ }
+}
+
+/*
+ * The lcore main. This is the main thread that does the work, reading from an
+ * input port and writing to an output port.
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t portid;
+ unsigned nb_rx;
+ struct rte_mbuf *m;
+
+ /*
+ * Check that the port is on the same NUMA node as the polling thread
+ * for best performance.
+ */
+ printf("\nCore %u Waiting for SYNC packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+
+ /* Run until the application is quit or killed. */
+
+ while (1) {
+ /* Read packet from RX queues. */
+ for (portid = 0; portid < ptp_enabled_port_nb; portid++) {
+
+ portid = ptp_enabled_ports[portid];
+ nb_rx = rte_eth_rx_burst(portid, 0, &m, 1);
+
+ if (likely(nb_rx == 0))
+ continue;
+
+ if (m->ol_flags & PKT_RX_IEEE1588_PTP)
+ parse_ptp_frames(portid, m);
+
+ rte_pktmbuf_free(m);
+ }
+ }
+}
+
+static void
+print_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK -T VALUE\n"
+ " -T VALUE: 0 - Disable, 1 - Enable Linux Clock"
+ " Synchronization (0 default)\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
+ prgname);
+}
+
+static int
+ptp_parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* Parse the hexadecimal string. */
+ pm = strtoul(portmask, &end, 16);
+
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+static int
+parse_ptp_kernel(const char *param)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* Parse the hexadecimal string. */
+ pm = strtoul(param, &end, 16);
+
+ if ((param[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (pm == 0)
+ return 0;
+
+ return 1;
+}
+
+/* Parse the commandline arguments. */
+static int
+ptp_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = { {NULL, 0, 0, 0} };
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:T:",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+
+ /* Portmask. */
+ case 'p':
+ ptp_enabled_port_mask = ptp_parse_portmask(optarg);
+ if (ptp_enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+ /* Time synchronization. */
+ case 'T':
+ ret = parse_ptp_kernel(optarg);
+ if (ret < 0) {
+ print_usage(prgname);
+ return -1;
+ }
+
+ ptp_data.kernel_time_set = ret;
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ argv[optind-1] = prgname;
+
+ optind = 0; /* Reset getopt lib. */
+
+ return 0;
+}
+
+/*
+ * The main function, which does initialization and calls the per-lcore
+ * functions.
+ */
+int
+main(int argc, char *argv[])
+{
+ unsigned nb_ports;
+
+ uint8_t portid;
+
+ /* Initialize the Environment Abstraction Layer (EAL). */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+
+ memset(&ptp_data, '\0', sizeof(struct ptpv2_data_slave_ordinary));
+
+ argc -= ret;
+ argv += ret;
+
+ ret = ptp_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with PTP initialization\n");
+
+ /* Check that there is an even number of ports to send/receive on. */
+ nb_ports = rte_eth_dev_count();
+
+ /* Creates a new mempool in memory to hold the mbufs. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* Initialize all ports. */
+ for (portid = 0; portid < nb_ports; portid++) {
+ if ((ptp_enabled_port_mask & (1 << portid)) != 0) {
+ if (port_init(portid, mbuf_pool) == 0) {
+ ptp_enabled_ports[ptp_enabled_port_nb] = portid;
+ ptp_enabled_port_nb++;
+ } else {
+ rte_exit(EXIT_FAILURE,
+ "Cannot init port %"PRIu8 "\n",
+ portid);
+ }
+ } else
+ printf("Skipping disabled port %u\n", portid);
+ }
+
+ if (ptp_enabled_port_nb == 0) {
+ rte_exit(EXIT_FAILURE,
+ "All available ports are disabled."
+ " Please set portmask.\n");
+ }
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
+
+ /* Call lcore_main on the master core only. */
+ lcore_main();
+
+ return 0;
+}
diff --git a/examples/qos_meter/Makefile b/examples/qos_meter/Makefile
new file mode 100644
index 00000000..5113a129
--- /dev/null
+++ b/examples/qos_meter/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = qos_meter
+
+# all source are stored in SRCS-y
+SRCS-y := main.c rte_policer.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/qos_meter/main.c b/examples/qos_meter/main.c
new file mode 100644
index 00000000..b968b001
--- /dev/null
+++ b/examples/qos_meter/main.c
@@ -0,0 +1,394 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2016 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_mempool.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_mbuf.h>
+#include <rte_meter.h>
+
+/*
+ * Traffic metering configuration
+ *
+ */
+#define APP_MODE_FWD 0
+#define APP_MODE_SRTCM_COLOR_BLIND 1
+#define APP_MODE_SRTCM_COLOR_AWARE 2
+#define APP_MODE_TRTCM_COLOR_BLIND 3
+#define APP_MODE_TRTCM_COLOR_AWARE 4
+
+#define APP_MODE APP_MODE_SRTCM_COLOR_BLIND
+
+
+#include "main.h"
+
+
+#define APP_PKT_FLOW_POS 33
+#define APP_PKT_COLOR_POS 5
+
+
+#if APP_PKT_FLOW_POS > 64 || APP_PKT_COLOR_POS > 64
+#error Byte offset needs to be less than 64
+#endif
+
+/*
+ * Buffer pool configuration
+ *
+ ***/
+#define NB_MBUF 8192
+#define MEMPOOL_CACHE_SIZE 256
+
+static struct rte_mempool *pool = NULL;
+
+/*
+ * NIC configuration
+ *
+ ***/
+static struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_RSS,
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0,
+ .hw_ip_checksum = 1,
+ .hw_vlan_filter = 0,
+ .jumbo_frame = 0,
+ .hw_strip_crc = 0,
+ },
+ .rx_adv_conf = {
+ .rss_conf = {
+ .rss_key = NULL,
+ .rss_hf = ETH_RSS_IP,
+ },
+ },
+ .txmode = {
+ .mq_mode = ETH_DCB_NONE,
+ },
+};
+
+#define NIC_RX_QUEUE_DESC 128
+#define NIC_TX_QUEUE_DESC 512
+
+#define NIC_RX_QUEUE 0
+#define NIC_TX_QUEUE 0
+
+/*
+ * Packet RX/TX
+ *
+ ***/
+#define PKT_RX_BURST_MAX 32
+#define PKT_TX_BURST_MAX 32
+#define TIME_TX_DRAIN 200000ULL
+
+static uint8_t port_rx;
+static uint8_t port_tx;
+static struct rte_mbuf *pkts_rx[PKT_RX_BURST_MAX];
+struct rte_eth_dev_tx_buffer *tx_buffer;
+
+struct rte_meter_srtcm_params app_srtcm_params[] = {
+ {.cir = 1000000 * 46, .cbs = 2048, .ebs = 2048},
+};
+
+struct rte_meter_trtcm_params app_trtcm_params[] = {
+ {.cir = 1000000 * 46, .pir = 1500000 * 46, .cbs = 2048, .pbs = 2048},
+};
+
+#define APP_FLOWS_MAX 256
+
+FLOW_METER app_flows[APP_FLOWS_MAX];
+
+static void
+app_configure_flow_table(void)
+{
+ uint32_t i, j;
+
+ for (i = 0, j = 0; i < APP_FLOWS_MAX; i ++, j = (j + 1) % RTE_DIM(PARAMS)){
+ FUNC_CONFIG(&app_flows[i], &PARAMS[j]);
+ }
+}
+
+static inline void
+app_set_pkt_color(uint8_t *pkt_data, enum policer_action color)
+{
+ pkt_data[APP_PKT_COLOR_POS] = (uint8_t)color;
+}
+
+static inline int
+app_pkt_handle(struct rte_mbuf *pkt, uint64_t time)
+{
+ uint8_t input_color, output_color;
+ uint8_t *pkt_data = rte_pktmbuf_mtod(pkt, uint8_t *);
+ uint32_t pkt_len = rte_pktmbuf_pkt_len(pkt) - sizeof(struct ether_hdr);
+ uint8_t flow_id = (uint8_t)(pkt_data[APP_PKT_FLOW_POS] & (APP_FLOWS_MAX - 1));
+ input_color = pkt_data[APP_PKT_COLOR_POS];
+ enum policer_action action;
+
+ /* color input is not used for blind modes */
+ output_color = (uint8_t) FUNC_METER(&app_flows[flow_id], time, pkt_len,
+ (enum rte_meter_color) input_color);
+
+ /* Apply policing and set the output color */
+ action = policer_table[input_color][output_color];
+ app_set_pkt_color(pkt_data, action);
+
+ return action;
+}
+
+
+static __attribute__((noreturn)) int
+main_loop(__attribute__((unused)) void *dummy)
+{
+ uint64_t current_time, last_time = rte_rdtsc();
+ uint32_t lcore_id = rte_lcore_id();
+
+ printf("Core %u: port RX = %d, port TX = %d\n", lcore_id, port_rx, port_tx);
+
+ while (1) {
+ uint64_t time_diff;
+ int i, nb_rx;
+
+ /* Mechanism to avoid stale packets in the output buffer */
+ current_time = rte_rdtsc();
+ time_diff = current_time - last_time;
+ if (unlikely(time_diff > TIME_TX_DRAIN)) {
+ /* Flush tx buffer */
+ rte_eth_tx_buffer_flush(port_tx, NIC_TX_QUEUE, tx_buffer);
+ last_time = current_time;
+ }
+
+ /* Read packet burst from NIC RX */
+ nb_rx = rte_eth_rx_burst(port_rx, NIC_RX_QUEUE, pkts_rx, PKT_RX_BURST_MAX);
+
+ /* Handle packets */
+ for (i = 0; i < nb_rx; i ++) {
+ struct rte_mbuf *pkt = pkts_rx[i];
+
+ /* Handle current packet */
+ if (app_pkt_handle(pkt, current_time) == DROP)
+ rte_pktmbuf_free(pkt);
+ else
+ rte_eth_tx_buffer(port_tx, NIC_TX_QUEUE, tx_buffer, pkt);
+ }
+ }
+}
+
+static void
+print_usage(const char *prgname)
+{
+ printf ("%s [EAL options] -- -p PORTMASK\n"
+ " -p PORTMASK: hexadecimal bitmask of ports to configure\n",
+ prgname);
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/* Parse the argument given in the command line of the application */
+static int
+parse_args(int argc, char **argv)
+{
+ int opt;
+ char **argvopt;
+ int option_index;
+ char *prgname = argv[0];
+ static struct option lgopts[] = {
+ {NULL, 0, 0, 0}
+ };
+ uint64_t port_mask, i, mask;
+
+ argvopt = argv;
+
+ while ((opt = getopt_long(argc, argvopt, "p:", lgopts, &option_index)) != EOF) {
+ switch (opt) {
+ case 'p':
+ port_mask = parse_portmask(optarg);
+ if (port_mask == 0) {
+ printf("invalid port mask (null port mask)\n");
+ print_usage(prgname);
+ return -1;
+ }
+
+ for (i = 0, mask = 1; i < 64; i ++, mask <<= 1){
+ if (mask & port_mask){
+ port_rx = i;
+ port_mask &= ~ mask;
+ break;
+ }
+ }
+
+ for (i = 0, mask = 1; i < 64; i ++, mask <<= 1){
+ if (mask & port_mask){
+ port_tx = i;
+ port_mask &= ~ mask;
+ break;
+ }
+ }
+
+ if (port_mask != 0) {
+ printf("invalid port mask (more than 2 ports)\n");
+ print_usage(prgname);
+ return -1;
+ }
+ break;
+
+ default:
+ print_usage(prgname);
+ return -1;
+ }
+ }
+
+ if (optind <= 1) {
+ print_usage(prgname);
+ return -1;
+ }
+
+ argv[optind-1] = prgname;
+
+ optind = 0; /* reset getopt lib */
+ return 0;
+}
+
+int
+main(int argc, char **argv)
+{
+ uint32_t lcore_id;
+ int ret;
+
+ /* EAL init */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid EAL parameters\n");
+ argc -= ret;
+ argv += ret;
+ if (rte_lcore_count() != 1) {
+ rte_exit(EXIT_FAILURE, "This application does not accept more than one core. "
+ "Please adjust the \"-c COREMASK\" parameter accordingly.\n");
+ }
+
+ /* Application non-EAL arguments parse */
+ ret = parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid input arguments\n");
+
+ /* Buffer pool init */
+ pool = rte_pktmbuf_pool_create("pool", NB_MBUF, MEMPOOL_CACHE_SIZE,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (pool == NULL)
+ rte_exit(EXIT_FAILURE, "Buffer pool creation error\n");
+
+ /* NIC init */
+ ret = rte_eth_dev_configure(port_rx, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_rx, ret);
+
+ ret = rte_eth_rx_queue_setup(port_rx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC,
+ rte_eth_dev_socket_id(port_rx),
+ NULL, pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_rx, ret);
+
+ ret = rte_eth_tx_queue_setup(port_rx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC,
+ rte_eth_dev_socket_id(port_rx),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d TX queue setup error (%d)\n", port_rx, ret);
+
+ ret = rte_eth_dev_configure(port_tx, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d configuration error (%d)\n", port_tx, ret);
+
+ ret = rte_eth_rx_queue_setup(port_tx, NIC_RX_QUEUE, NIC_RX_QUEUE_DESC,
+ rte_eth_dev_socket_id(port_tx),
+ NULL, pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d RX queue setup error (%d)\n", port_tx, ret);
+
+ ret = rte_eth_tx_queue_setup(port_tx, NIC_TX_QUEUE, NIC_TX_QUEUE_DESC,
+ rte_eth_dev_socket_id(port_tx),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d TX queue setup error (%d)\n", port_tx, ret);
+
+ tx_buffer = rte_zmalloc_socket("tx_buffer",
+ RTE_ETH_TX_BUFFER_SIZE(PKT_TX_BURST_MAX), 0,
+ rte_eth_dev_socket_id(port_tx));
+ if (tx_buffer == NULL)
+ rte_exit(EXIT_FAILURE, "Port %d TX buffer allocation error\n",
+ port_tx);
+
+ rte_eth_tx_buffer_init(tx_buffer, PKT_TX_BURST_MAX);
+
+ ret = rte_eth_dev_start(port_rx);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d start error (%d)\n", port_rx, ret);
+
+ ret = rte_eth_dev_start(port_tx);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Port %d start error (%d)\n", port_tx, ret);
+
+ rte_eth_promiscuous_enable(port_rx);
+
+ rte_eth_promiscuous_enable(port_tx);
+
+ /* App configuration */
+ app_configure_flow_table();
+
+ /* Launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(main_loop, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/qos_meter/main.h b/examples/qos_meter/main.h
new file mode 100644
index 00000000..530bf69c
--- /dev/null
+++ b/examples/qos_meter/main.h
@@ -0,0 +1,93 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+enum policer_action {
+ GREEN = e_RTE_METER_GREEN,
+ YELLOW = e_RTE_METER_YELLOW,
+ RED = e_RTE_METER_RED,
+ DROP = 3,
+};
+
+enum policer_action policer_table[e_RTE_METER_COLORS][e_RTE_METER_COLORS] =
+{
+ { GREEN, RED, RED},
+ { DROP, YELLOW, RED},
+ { DROP, DROP, RED}
+};
+
+#if APP_MODE == APP_MODE_FWD
+
+#define FUNC_METER(a,b,c,d) color, flow_id=flow_id, pkt_len=pkt_len, time=time
+#define FUNC_CONFIG(a,b)
+#define PARAMS app_srtcm_params
+#define FLOW_METER int
+
+#elif APP_MODE == APP_MODE_SRTCM_COLOR_BLIND
+
+#define FUNC_METER(a,b,c,d) rte_meter_srtcm_color_blind_check(a,b,c)
+#define FUNC_CONFIG rte_meter_srtcm_config
+#define PARAMS app_srtcm_params
+#define FLOW_METER struct rte_meter_srtcm
+
+#elif (APP_MODE == APP_MODE_SRTCM_COLOR_AWARE)
+
+#define FUNC_METER rte_meter_srtcm_color_aware_check
+#define FUNC_CONFIG rte_meter_srtcm_config
+#define PARAMS app_srtcm_params
+#define FLOW_METER struct rte_meter_srtcm
+
+#elif (APP_MODE == APP_MODE_TRTCM_COLOR_BLIND)
+
+#define FUNC_METER(a,b,c,d) rte_meter_trtcm_color_blind_check(a,b,c)
+#define FUNC_CONFIG rte_meter_trtcm_config
+#define PARAMS app_trtcm_params
+#define FLOW_METER struct rte_meter_trtcm
+
+#elif (APP_MODE == APP_MODE_TRTCM_COLOR_AWARE)
+
+#define FUNC_METER rte_meter_trtcm_color_aware_check
+#define FUNC_CONFIG rte_meter_trtcm_config
+#define PARAMS app_trtcm_params
+#define FLOW_METER struct rte_meter_trtcm
+
+#else
+#error Invalid value for APP_MODE
+#endif
+
+
+
+
+#endif /* _MAIN_H_ */
diff --git a/examples/qos_meter/rte_policer.c b/examples/qos_meter/rte_policer.c
new file mode 100644
index 00000000..35f5f1b2
--- /dev/null
+++ b/examples/qos_meter/rte_policer.c
@@ -0,0 +1,58 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include "rte_policer.h"
+
+int
+rte_phb_config(struct rte_phb *phb_table, uint32_t phb_table_index,
+ enum rte_meter_color pre_meter, enum rte_meter_color post_meter, enum rte_phb_action action)
+{
+ struct rte_phb *phb = NULL;
+
+ /* User argument checking */
+ if (phb_table == NULL) {
+ return -1;
+ }
+
+ if ((pre_meter > e_RTE_METER_RED) || (post_meter > e_RTE_METER_RED) || (pre_meter > post_meter)) {
+ return -2;
+ }
+
+ /* Set action in PHB table entry */
+ phb = &phb_table[phb_table_index];
+ phb->actions[pre_meter][post_meter] = action;
+
+
+ return 0;
+}
diff --git a/examples/qos_meter/rte_policer.h b/examples/qos_meter/rte_policer.h
new file mode 100644
index 00000000..d2bcafbf
--- /dev/null
+++ b/examples/qos_meter/rte_policer.h
@@ -0,0 +1,64 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __INCLUDE_RTE_POLICER_H__
+#define __INCLUDE_RTE_POLICER_H__
+
+#include <stdint.h>
+#include <rte_meter.h>
+
+enum rte_phb_action {
+ e_RTE_PHB_ACTION_GREEN = e_RTE_METER_GREEN,
+ e_RTE_PHB_ACTION_YELLOW = e_RTE_METER_YELLOW,
+ e_RTE_PHB_ACTION_RED = e_RTE_METER_RED,
+ e_RTE_PHB_ACTION_DROP = 3,
+};
+
+struct rte_phb {
+ enum rte_phb_action actions[e_RTE_METER_COLORS][e_RTE_METER_COLORS];
+};
+
+int
+rte_phb_config(struct rte_phb *phb_table, uint32_t phb_table_index,
+ enum rte_meter_color pre_meter, enum rte_meter_color post_meter, enum rte_phb_action action);
+
+static inline enum rte_phb_action
+policer_run(struct rte_phb *phb_table, uint32_t phb_table_index, enum rte_meter_color pre_meter, enum rte_meter_color post_meter)
+{
+ struct rte_phb *phb = &phb_table[phb_table_index];
+ enum rte_phb_action action = phb->actions[pre_meter][post_meter];
+
+ return action;
+}
+
+#endif
diff --git a/examples/qos_sched/Makefile b/examples/qos_sched/Makefile
new file mode 100644
index 00000000..f59645f5
--- /dev/null
+++ b/examples/qos_sched/Makefile
@@ -0,0 +1,60 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = qos_sched
+
+# all source are stored in SRCS-y
+SRCS-y := main.c args.c init.c app_thread.c cfg_file.c cmdline.c stats.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS_args.o := -D_GNU_SOURCE
+CFLAGS_cfg_file.o := -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/qos_sched/app_thread.c b/examples/qos_sched/app_thread.c
new file mode 100644
index 00000000..3c678cc4
--- /dev/null
+++ b/examples/qos_sched/app_thread.c
@@ -0,0 +1,293 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+#include <rte_sched.h>
+
+#include "main.h"
+
+/*
+ * QoS parameters are encoded as follows:
+ * Outer VLAN ID defines subport
+ * Inner VLAN ID defines pipe
+ * Destination IP 0.0.XXX.0 defines traffic class
+ * Destination IP host (0.0.0.XXX) defines queue
+ * Values below define offset to each field from start of frame
+ */
+#define SUBPORT_OFFSET 7
+#define PIPE_OFFSET 9
+#define TC_OFFSET 20
+#define QUEUE_OFFSET 20
+#define COLOR_OFFSET 19
+
+static inline int
+get_pkt_sched(struct rte_mbuf *m, uint32_t *subport, uint32_t *pipe,
+ uint32_t *traffic_class, uint32_t *queue, uint32_t *color)
+{
+ uint16_t *pdata = rte_pktmbuf_mtod(m, uint16_t *);
+
+ *subport = (rte_be_to_cpu_16(pdata[SUBPORT_OFFSET]) & 0x0FFF) &
+ (port_params.n_subports_per_port - 1); /* Outer VLAN ID*/
+ *pipe = (rte_be_to_cpu_16(pdata[PIPE_OFFSET]) & 0x0FFF) &
+ (port_params.n_pipes_per_subport - 1); /* Inner VLAN ID */
+ *traffic_class = (pdata[QUEUE_OFFSET] & 0x0F) &
+ (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1); /* Destination IP */
+ *queue = ((pdata[QUEUE_OFFSET] >> 8) & 0x0F) &
+ (RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS - 1) ; /* Destination IP */
+ *color = pdata[COLOR_OFFSET] & 0x03; /* Destination IP */
+
+ return 0;
+}
+
+void
+app_rx_thread(struct thread_conf **confs)
+{
+ uint32_t i, nb_rx;
+ struct rte_mbuf *rx_mbufs[burst_conf.rx_burst] __rte_cache_aligned;
+ struct thread_conf *conf;
+ int conf_idx = 0;
+
+ uint32_t subport;
+ uint32_t pipe;
+ uint32_t traffic_class;
+ uint32_t queue;
+ uint32_t color;
+
+ while ((conf = confs[conf_idx])) {
+ nb_rx = rte_eth_rx_burst(conf->rx_port, conf->rx_queue, rx_mbufs,
+ burst_conf.rx_burst);
+
+ if (likely(nb_rx != 0)) {
+ APP_STATS_ADD(conf->stat.nb_rx, nb_rx);
+
+ for(i = 0; i < nb_rx; i++) {
+ get_pkt_sched(rx_mbufs[i],
+ &subport, &pipe, &traffic_class, &queue, &color);
+ rte_sched_port_pkt_write(rx_mbufs[i], subport, pipe,
+ traffic_class, queue, (enum rte_meter_color) color);
+ }
+
+ if (unlikely(rte_ring_sp_enqueue_bulk(conf->rx_ring,
+ (void **)rx_mbufs, nb_rx) != 0)) {
+ for(i = 0; i < nb_rx; i++) {
+ rte_pktmbuf_free(rx_mbufs[i]);
+
+ APP_STATS_ADD(conf->stat.nb_drop, 1);
+ }
+ }
+ }
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+
+/* Send the packet to an output interface
+ * For performance reason function returns number of packets dropped, not sent,
+ * so 0 means that all packets were sent successfully
+ */
+
+static inline void
+app_send_burst(struct thread_conf *qconf)
+{
+ struct rte_mbuf **mbufs;
+ uint32_t n, ret;
+
+ mbufs = (struct rte_mbuf **)qconf->m_table;
+ n = qconf->n_mbufs;
+
+ do {
+ ret = rte_eth_tx_burst(qconf->tx_port, qconf->tx_queue, mbufs, (uint16_t)n);
+ /* we cannot drop the packets, so re-send */
+ /* update number of packets to be sent */
+ n -= ret;
+ mbufs = (struct rte_mbuf **)&mbufs[ret];
+ } while (n);
+}
+
+
+/* Send the packet to an output interface */
+static void
+app_send_packets(struct thread_conf *qconf, struct rte_mbuf **mbufs, uint32_t nb_pkt)
+{
+ uint32_t i, len;
+
+ len = qconf->n_mbufs;
+ for(i = 0; i < nb_pkt; i++) {
+ qconf->m_table[len] = mbufs[i];
+ len++;
+ /* enough pkts to be sent */
+ if (unlikely(len == burst_conf.tx_burst)) {
+ qconf->n_mbufs = len;
+ app_send_burst(qconf);
+ len = 0;
+ }
+ }
+
+ qconf->n_mbufs = len;
+}
+
+void
+app_tx_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.qos_dequeue];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+ int retval;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ while ((conf = confs[conf_idx])) {
+ retval = rte_ring_sc_dequeue_bulk(conf->tx_ring, (void **)mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(retval == 0)) {
+ app_send_packets(conf, mbufs, burst_conf.qos_dequeue);
+
+ conf->counter = 0; /* reset empty read loop counter */
+ }
+
+ conf->counter++;
+
+ /* drain ring and TX queues */
+ if (unlikely(conf->counter > drain_tsc)) {
+ /* now check is there any packets left to be transmitted */
+ if (conf->n_mbufs != 0) {
+ app_send_burst(conf);
+
+ conf->n_mbufs = 0;
+ }
+ conf->counter = 0;
+ }
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+void
+app_worker_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.ring_burst];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+
+ while ((conf = confs[conf_idx])) {
+ uint32_t nb_pkt;
+ int retval;
+
+ /* Read packet from the ring */
+ retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+ burst_conf.ring_burst);
+ if (likely(retval == 0)) {
+ int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+ burst_conf.ring_burst);
+
+ APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+ APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+ }
+
+ nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(nb_pkt > 0))
+ while (rte_ring_sp_enqueue_bulk(conf->tx_ring, (void **)mbufs, nb_pkt) != 0);
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
+
+
+void
+app_mixed_thread(struct thread_conf **confs)
+{
+ struct rte_mbuf *mbufs[burst_conf.ring_burst];
+ struct thread_conf *conf;
+ int conf_idx = 0;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+
+ while ((conf = confs[conf_idx])) {
+ uint32_t nb_pkt;
+ int retval;
+
+ /* Read packet from the ring */
+ retval = rte_ring_sc_dequeue_bulk(conf->rx_ring, (void **)mbufs,
+ burst_conf.ring_burst);
+ if (likely(retval == 0)) {
+ int nb_sent = rte_sched_port_enqueue(conf->sched_port, mbufs,
+ burst_conf.ring_burst);
+
+ APP_STATS_ADD(conf->stat.nb_drop, burst_conf.ring_burst - nb_sent);
+ APP_STATS_ADD(conf->stat.nb_rx, burst_conf.ring_burst);
+ }
+
+
+ nb_pkt = rte_sched_port_dequeue(conf->sched_port, mbufs,
+ burst_conf.qos_dequeue);
+ if (likely(nb_pkt > 0)) {
+ app_send_packets(conf, mbufs, nb_pkt);
+
+ conf->counter = 0; /* reset empty read loop counter */
+ }
+
+ conf->counter++;
+
+ /* drain ring and TX queues */
+ if (unlikely(conf->counter > drain_tsc)) {
+
+ /* now check is there any packets left to be transmitted */
+ if (conf->n_mbufs != 0) {
+ app_send_burst(conf);
+
+ conf->n_mbufs = 0;
+ }
+ conf->counter = 0;
+ }
+
+ conf_idx++;
+ if (confs[conf_idx] == NULL)
+ conf_idx = 0;
+ }
+}
diff --git a/examples/qos_sched/args.c b/examples/qos_sched/args.c
new file mode 100644
index 00000000..3e7fd087
--- /dev/null
+++ b/examples/qos_sched/args.c
@@ -0,0 +1,485 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <locale.h>
+#include <unistd.h>
+#include <limits.h>
+#include <getopt.h>
+
+#include <rte_log.h>
+#include <rte_eal.h>
+#include <rte_lcore.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+
+#define APP_NAME "qos_sched"
+#define MAX_OPT_VALUES 8
+#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u/topology/"
+
+static uint32_t app_master_core = 1;
+static uint32_t app_numa_mask;
+static uint64_t app_used_core_mask = 0;
+static uint64_t app_used_port_mask = 0;
+static uint64_t app_used_rx_port_mask = 0;
+static uint64_t app_used_tx_port_mask = 0;
+
+
+static const char usage[] =
+ " \n"
+ " %s <APP PARAMS> \n"
+ " \n"
+ "Application mandatory parameters: \n"
+ " --pfc \"RX PORT, TX PORT, RX LCORE, WT LCORE\" : Packet flow configuration \n"
+ " multiple pfc can be configured in command line \n"
+ " \n"
+ "Application optional parameters: \n"
+ " --i : run in interactive mode (default value is %u) \n"
+ " --mst I : master core index (default value is %u) \n"
+ " --rsz \"A, B, C\" : Ring sizes \n"
+ " A = Size (in number of buffer descriptors) of each of the NIC RX \n"
+ " rings read by the I/O RX lcores (default value is %u) \n"
+ " B = Size (in number of elements) of each of the SW rings used by the\n"
+ " I/O RX lcores to send packets to worker lcores (default value is\n"
+ " %u) \n"
+ " C = Size (in number of buffer descriptors) of each of the NIC TX \n"
+ " rings written by worker lcores (default value is %u) \n"
+ " --bsz \"A, B, C, D\": Burst sizes \n"
+ " A = I/O RX lcore read burst size from NIC RX (default value is %u) \n"
+ " B = I/O RX lcore write burst size to output SW rings, \n"
+ " Worker lcore read burst size from input SW rings, \n"
+ " QoS enqueue size (default value is %u) \n"
+ " C = QoS dequeue size (default value is %u) \n"
+ " D = Worker lcore write burst size to NIC TX (default value is %u) \n"
+ " --msz M : Mempool size (in number of mbufs) for each pfc (default %u) \n"
+ " --rth \"A, B, C\" : RX queue threshold parameters \n"
+ " A = RX prefetch threshold (default value is %u) \n"
+ " B = RX host threshold (default value is %u) \n"
+ " C = RX write-back threshold (default value is %u) \n"
+ " --tth \"A, B, C\" : TX queue threshold parameters \n"
+ " A = TX prefetch threshold (default value is %u) \n"
+ " B = TX host threshold (default value is %u) \n"
+ " C = TX write-back threshold (default value is %u) \n"
+ " --cfg FILE : profile configuration to load \n"
+;
+
+/* display usage */
+static void
+app_usage(const char *prgname)
+{
+ printf(usage, prgname, APP_INTERACTIVE_DEFAULT, app_master_core,
+ APP_RX_DESC_DEFAULT, APP_RING_SIZE, APP_TX_DESC_DEFAULT,
+ MAX_PKT_RX_BURST, PKT_ENQUEUE, PKT_DEQUEUE,
+ MAX_PKT_TX_BURST, NB_MBUF,
+ RX_PTHRESH, RX_HTHRESH, RX_WTHRESH,
+ TX_PTHRESH, TX_HTHRESH, TX_WTHRESH
+ );
+}
+
+static inline int str_is(const char *str, const char *is)
+{
+ return strcmp(str, is) == 0;
+}
+
+/* returns core mask used by DPDK */
+static uint64_t
+app_eal_core_mask(void)
+{
+ uint32_t i;
+ uint64_t cm = 0;
+ struct rte_config *cfg = rte_eal_get_configuration();
+
+ for (i = 0; i < RTE_MAX_LCORE; i++) {
+ if (cfg->lcore_role[i] == ROLE_RTE)
+ cm |= (1ULL << i);
+ }
+
+ cm |= (1ULL << cfg->master_lcore);
+
+ return cm;
+}
+
+
+/* returns total number of cores presented in a system */
+static uint32_t
+app_cpu_core_count(void)
+{
+ int i, len;
+ char path[PATH_MAX];
+ uint32_t ncores = 0;
+
+ for(i = 0; i < RTE_MAX_LCORE; i++) {
+ len = snprintf(path, sizeof(path), SYS_CPU_DIR, i);
+ if (len <= 0 || (unsigned)len >= sizeof(path))
+ continue;
+
+ if (access(path, F_OK) == 0)
+ ncores++;
+ }
+
+ return ncores;
+}
+
+/* returns:
+ number of values parsed
+ -1 in case of error
+*/
+static int
+app_parse_opt_vals(const char *conf_str, char separator, uint32_t n_vals, uint32_t *opt_vals)
+{
+ char *string;
+ uint32_t i, n_tokens;
+ char *tokens[MAX_OPT_VALUES];
+
+ if (conf_str == NULL || opt_vals == NULL || n_vals == 0 || n_vals > MAX_OPT_VALUES)
+ return -1;
+
+ /* duplicate configuration string before splitting it to tokens */
+ string = strdup(conf_str);
+ if (string == NULL)
+ return -1;
+
+ n_tokens = rte_strsplit(string, strnlen(string, 32), tokens, n_vals, separator);
+
+ for(i = 0; i < n_tokens; i++) {
+ opt_vals[i] = (uint32_t)atol(tokens[i]);
+ }
+
+ free(string);
+
+ return n_tokens;
+}
+
+static int
+app_parse_ring_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ ring_conf.rx_size = vals[0];
+ ring_conf.ring_size = vals[1];
+ ring_conf.tx_size = vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_rth_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ rx_thresh.pthresh = (uint8_t)vals[0];
+ rx_thresh.hthresh = (uint8_t)vals[1];
+ rx_thresh.wthresh = (uint8_t)vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_tth_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[3];
+
+ ret = app_parse_opt_vals(conf_str, ',', 3, vals);
+ if (ret != 3)
+ return ret;
+
+ tx_thresh.pthresh = (uint8_t)vals[0];
+ tx_thresh.hthresh = (uint8_t)vals[1];
+ tx_thresh.wthresh = (uint8_t)vals[2];
+
+ return 0;
+}
+
+static int
+app_parse_flow_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[5];
+ struct flow_conf *pconf;
+ uint64_t mask;
+
+ ret = app_parse_opt_vals(conf_str, ',', 6, vals);
+ if (ret < 4 || ret > 5)
+ return ret;
+
+ pconf = &qos_conf[nb_pfc];
+
+ pconf->rx_port = (uint8_t)vals[0];
+ pconf->tx_port = (uint8_t)vals[1];
+ pconf->rx_core = (uint8_t)vals[2];
+ pconf->wt_core = (uint8_t)vals[3];
+ if (ret == 5)
+ pconf->tx_core = (uint8_t)vals[4];
+ else
+ pconf->tx_core = pconf->wt_core;
+
+ if (pconf->rx_core == pconf->wt_core) {
+ RTE_LOG(ERR, APP, "pfc %u: rx thread and worker thread cannot share same core\n", nb_pfc);
+ return -1;
+ }
+
+ if (pconf->rx_port >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid rx port %"PRIu8" index\n",
+ nb_pfc, pconf->rx_port);
+ return -1;
+ }
+ if (pconf->tx_port >= RTE_MAX_ETHPORTS) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid tx port %"PRIu8" index\n",
+ nb_pfc, pconf->rx_port);
+ return -1;
+ }
+
+ mask = 1lu << pconf->rx_port;
+ if (app_used_rx_port_mask & mask) {
+ RTE_LOG(ERR, APP, "pfc %u: rx port %"PRIu8" is used already\n",
+ nb_pfc, pconf->rx_port);
+ return -1;
+ }
+ app_used_rx_port_mask |= mask;
+ app_used_port_mask |= mask;
+
+ mask = 1lu << pconf->tx_port;
+ if (app_used_tx_port_mask & mask) {
+ RTE_LOG(ERR, APP, "pfc %u: port %"PRIu8" is used already\n",
+ nb_pfc, pconf->tx_port);
+ return -1;
+ }
+ app_used_tx_port_mask |= mask;
+ app_used_port_mask |= mask;
+
+ mask = 1lu << pconf->rx_core;
+ app_used_core_mask |= mask;
+
+ mask = 1lu << pconf->wt_core;
+ app_used_core_mask |= mask;
+
+ mask = 1lu << pconf->tx_core;
+ app_used_core_mask |= mask;
+
+ nb_pfc++;
+
+ return 0;
+}
+
+static int
+app_parse_burst_conf(const char *conf_str)
+{
+ int ret;
+ uint32_t vals[4];
+
+ ret = app_parse_opt_vals(conf_str, ',', 4, vals);
+ if (ret != 4)
+ return ret;
+
+ burst_conf.rx_burst = (uint16_t)vals[0];
+ burst_conf.ring_burst = (uint16_t)vals[1];
+ burst_conf.qos_dequeue = (uint16_t)vals[2];
+ burst_conf.tx_burst = (uint16_t)vals[3];
+
+ return 0;
+}
+
+/*
+ * Parses the argument given in the command line of the application,
+ * calculates mask for used cores and initializes EAL with calculated core mask
+ */
+int
+app_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ int option_index;
+ const char *optname;
+ char *prgname = argv[0];
+ uint32_t i, nb_lcores;
+
+ static struct option lgopts[] = {
+ { "pfc", 1, 0, 0 },
+ { "mst", 1, 0, 0 },
+ { "rsz", 1, 0, 0 },
+ { "bsz", 1, 0, 0 },
+ { "msz", 1, 0, 0 },
+ { "rth", 1, 0, 0 },
+ { "tth", 1, 0, 0 },
+ { "cfg", 1, 0, 0 },
+ { NULL, 0, 0, 0 }
+ };
+
+ /* initialize EAL first */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ argc -= ret;
+ argv += ret;
+
+ /* set en_US locale to print big numbers with ',' */
+ setlocale(LC_NUMERIC, "en_US.utf-8");
+
+ while ((opt = getopt_long(argc, argv, "i",
+ lgopts, &option_index)) != EOF) {
+
+ switch (opt) {
+ case 'i':
+ printf("Interactive-mode selected\n");
+ interactive = 1;
+ break;
+ /* long options */
+ case 0:
+ optname = lgopts[option_index].name;
+ if (str_is(optname, "pfc")) {
+ ret = app_parse_flow_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid pipe configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "mst")) {
+ app_master_core = (uint32_t)atoi(optarg);
+ break;
+ }
+ if (str_is(optname, "rsz")) {
+ ret = app_parse_ring_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid ring configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "bsz")) {
+ ret = app_parse_burst_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid burst configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "msz")) {
+ mp_size = atoi(optarg);
+ if (mp_size <= 0) {
+ RTE_LOG(ERR, APP, "Invalid mempool size %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "rth")) {
+ ret = app_parse_rth_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid RX threshold configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "tth")) {
+ ret = app_parse_tth_conf(optarg);
+ if (ret) {
+ RTE_LOG(ERR, APP, "Invalid TX threshold configuration %s\n", optarg);
+ return -1;
+ }
+ break;
+ }
+ if (str_is(optname, "cfg")) {
+ cfg_profile = optarg;
+ break;
+ }
+ break;
+
+ default:
+ app_usage(prgname);
+ return -1;
+ }
+ }
+
+ /* check master core index validity */
+ for(i = 0; i <= app_master_core; i++) {
+ if (app_used_core_mask & (1u << app_master_core)) {
+ RTE_LOG(ERR, APP, "Master core index is not configured properly\n");
+ app_usage(prgname);
+ return -1;
+ }
+ }
+ app_used_core_mask |= 1u << app_master_core;
+
+ if ((app_used_core_mask != app_eal_core_mask()) ||
+ (app_master_core != rte_get_master_lcore())) {
+ RTE_LOG(ERR, APP, "EAL core mask not configured properly, must be %" PRIx64
+ " instead of %" PRIx64 "\n" , app_used_core_mask, app_eal_core_mask());
+ return -1;
+ }
+
+ if (nb_pfc == 0) {
+ RTE_LOG(ERR, APP, "Packet flow not configured!\n");
+ app_usage(prgname);
+ return -1;
+ }
+
+ /* sanity check for cores assignment */
+ nb_lcores = app_cpu_core_count();
+
+ for(i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].rx_core >= nb_lcores) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid RX lcore index %u\n", i + 1,
+ qos_conf[i].rx_core);
+ return -1;
+ }
+ if (qos_conf[i].wt_core >= nb_lcores) {
+ RTE_LOG(ERR, APP, "pfc %u: invalid WT lcore index %u\n", i + 1,
+ qos_conf[i].wt_core);
+ return -1;
+ }
+ uint32_t rx_sock = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ uint32_t wt_sock = rte_lcore_to_socket_id(qos_conf[i].wt_core);
+ if (rx_sock != wt_sock) {
+ RTE_LOG(ERR, APP, "pfc %u: RX and WT must be on the same socket\n", i + 1);
+ return -1;
+ }
+ app_numa_mask |= 1 << rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ }
+
+ return 0;
+}
diff --git a/examples/qos_sched/cfg_file.c b/examples/qos_sched/cfg_file.c
new file mode 100644
index 00000000..94a1a221
--- /dev/null
+++ b/examples/qos_sched/cfg_file.c
@@ -0,0 +1,342 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <rte_string_fns.h>
+#include <rte_sched.h>
+
+#include "cfg_file.h"
+#include "main.h"
+
+
+/** when we resize a file structure, how many extra entries
+ * for new sections do we add in */
+#define CFG_ALLOC_SECTION_BATCH 8
+/** when we resize a section structure, how many extra entries
+ * for new entries do we add in */
+#define CFG_ALLOC_ENTRY_BATCH 16
+
+int
+cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port_params)
+{
+ const char *entry;
+ int j;
+
+ if (!cfg || !port_params)
+ return -1;
+
+ entry = rte_cfgfile_get_entry(cfg, "port", "frame overhead");
+ if (entry)
+ port_params->frame_overhead = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, "port", "number of subports per port");
+ if (entry)
+ port_params->n_subports_per_port = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, "port", "number of pipes per subport");
+ if (entry)
+ port_params->n_pipes_per_subport = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, "port", "queue sizes");
+ if (entry) {
+ char *next;
+
+ for(j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ port_params->qsize[j] = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+#ifdef RTE_SCHED_RED
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) {
+ char str[32];
+
+ /* Parse WRED min thresholds */
+ snprintf(str, sizeof(str), "tc %d wred min", j);
+ entry = rte_cfgfile_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].min_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED max thresholds */
+ snprintf(str, sizeof(str), "tc %d wred max", j);
+ entry = rte_cfgfile_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].max_th
+ = (uint16_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED inverse mark probabilities */
+ snprintf(str, sizeof(str), "tc %d wred inv prob", j);
+ entry = rte_cfgfile_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].maxp_inv
+ = (uint8_t)strtol(entry, &next, 10);
+
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+
+ /* Parse WRED EWMA filter weights */
+ snprintf(str, sizeof(str), "tc %d wred weight", j);
+ entry = rte_cfgfile_get_entry(cfg, "red", str);
+ if (entry) {
+ char *next;
+ int k;
+ /* for each packet colour (green, yellow, red) */
+ for (k = 0; k < e_RTE_METER_COLORS; k++) {
+ port_params->red_params[j][k].wq_log2
+ = (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ }
+#endif /* RTE_SCHED_RED */
+
+ return 0;
+}
+
+int
+cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe_params)
+{
+ int i, j;
+ char *next;
+ const char *entry;
+ int profiles;
+
+ if (!cfg || !pipe_params)
+ return -1;
+
+ profiles = rte_cfgfile_num_sections(cfg, "pipe profile", sizeof("pipe profile") - 1);
+ port_params.n_pipe_profiles = profiles;
+
+ for (j = 0; j < profiles; j++) {
+ char pipe_name[32];
+ snprintf(pipe_name, sizeof(pipe_name), "pipe profile %d", j);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tb rate");
+ if (entry)
+ pipe_params[j].tb_rate = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tb size");
+ if (entry)
+ pipe_params[j].tb_size = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc period");
+ if (entry)
+ pipe_params[j].tc_period = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 0 rate");
+ if (entry)
+ pipe_params[j].tc_rate[0] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 1 rate");
+ if (entry)
+ pipe_params[j].tc_rate[1] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 2 rate");
+ if (entry)
+ pipe_params[j].tc_rate[2] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 rate");
+ if (entry)
+ pipe_params[j].tc_rate[3] = (uint32_t)atoi(entry);
+
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 oversubscription weight");
+ if (entry)
+ pipe_params[j].tc_ov_weight = (uint8_t)atoi(entry);
+#endif
+
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 0 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*0 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 1 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*1 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 2 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*2 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ entry = rte_cfgfile_get_entry(cfg, pipe_name, "tc 3 wrr weights");
+ if (entry) {
+ for(i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ pipe_params[j].wrr_weights[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE*3 + i] =
+ (uint8_t)strtol(entry, &next, 10);
+ if (next == NULL)
+ break;
+ entry = next;
+ }
+ }
+ }
+ return 0;
+}
+
+int
+cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport_params)
+{
+ const char *entry;
+ int i, j, k;
+
+ if (!cfg || !subport_params)
+ return -1;
+
+ memset(app_pipe_to_profile, -1, sizeof(app_pipe_to_profile));
+
+ for (i = 0; i < MAX_SCHED_SUBPORTS; i++) {
+ char sec_name[CFG_NAME_LEN];
+ snprintf(sec_name, sizeof(sec_name), "subport %d", i);
+
+ if (rte_cfgfile_has_section(cfg, sec_name)) {
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tb rate");
+ if (entry)
+ subport_params[i].tb_rate = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tb size");
+ if (entry)
+ subport_params[i].tb_size = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tc period");
+ if (entry)
+ subport_params[i].tc_period = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 0 rate");
+ if (entry)
+ subport_params[i].tc_rate[0] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 1 rate");
+ if (entry)
+ subport_params[i].tc_rate[1] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 2 rate");
+ if (entry)
+ subport_params[i].tc_rate[2] = (uint32_t)atoi(entry);
+
+ entry = rte_cfgfile_get_entry(cfg, sec_name, "tc 3 rate");
+ if (entry)
+ subport_params[i].tc_rate[3] = (uint32_t)atoi(entry);
+
+ int n_entries = rte_cfgfile_section_num_entries(cfg, sec_name);
+ struct rte_cfgfile_entry entries[n_entries];
+
+ rte_cfgfile_section_entries(cfg, sec_name, entries, n_entries);
+
+ for (j = 0; j < n_entries; j++) {
+ if (strncmp("pipe", entries[j].name, sizeof("pipe") - 1) == 0) {
+ int profile;
+ char *tokens[2] = {NULL, NULL};
+ int n_tokens;
+ int begin, end;
+
+ profile = atoi(entries[j].value);
+ n_tokens = rte_strsplit(&entries[j].name[sizeof("pipe")],
+ strnlen(entries[j].name, CFG_NAME_LEN), tokens, 2, '-');
+
+ begin = atoi(tokens[0]);
+ if (n_tokens == 2)
+ end = atoi(tokens[1]);
+ else
+ end = begin;
+
+ if (end >= MAX_SCHED_PIPES || begin > end)
+ return -1;
+
+ for (k = begin; k <= end; k++) {
+ char profile_name[CFG_NAME_LEN];
+
+ snprintf(profile_name, sizeof(profile_name),
+ "pipe profile %d", profile);
+ if (rte_cfgfile_has_section(cfg, profile_name))
+ app_pipe_to_profile[i][k] = profile;
+ else
+ rte_exit(EXIT_FAILURE, "Wrong pipe profile %s\n",
+ entries[j].value);
+
+ }
+ }
+ }
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/qos_sched/cfg_file.h b/examples/qos_sched/cfg_file.h
new file mode 100644
index 00000000..cc5a2cd5
--- /dev/null
+++ b/examples/qos_sched/cfg_file.h
@@ -0,0 +1,46 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __CFG_FILE_H__
+#define __CFG_FILE_H__
+
+#include <rte_sched.h>
+#include <rte_cfgfile.h>
+
+int cfg_load_port(struct rte_cfgfile *cfg, struct rte_sched_port_params *port);
+
+int cfg_load_pipe(struct rte_cfgfile *cfg, struct rte_sched_pipe_params *pipe);
+
+int cfg_load_subport(struct rte_cfgfile *cfg, struct rte_sched_subport_params *subport);
+
+#endif
diff --git a/examples/qos_sched/cmdline.c b/examples/qos_sched/cmdline.c
new file mode 100644
index 00000000..f79d5246
--- /dev/null
+++ b/examples/qos_sched/cmdline.c
@@ -0,0 +1,643 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <termios.h>
+#include <inttypes.h>
+#include <string.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "main.h"
+
+/* *** Help command with introduction. *** */
+struct cmd_help_result {
+ cmdline_fixed_string_t help;
+};
+
+static void cmd_help_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(
+ cl,
+ "\n"
+ "The following commands are currently available:\n\n"
+ "Control:\n"
+ " quit : Quit the application.\n"
+ "\nStatistics:\n"
+ " stats app : Show app statistics.\n"
+ " stats port X subport Y : Show stats of a specific subport.\n"
+ " stats port X subport Y pipe Z : Show stats of a specific pipe.\n"
+ "\nAverage queue size:\n"
+ " qavg port X subport Y : Show average queue size per subport.\n"
+ " qavg port X subport Y tc Z : Show average queue size per subport and TC.\n"
+ " qavg port X subport Y pipe Z : Show average queue size per pipe.\n"
+ " qavg port X subport Y pipe Z tc A : Show average queue size per pipe and TC.\n"
+ " qavg port X subport Y pipe Z tc A q B : Show average queue size of a specific queue.\n"
+ " qavg [n|period] X : Set number of times and peiod (us).\n\n"
+ );
+
+}
+
+cmdline_parse_token_string_t cmd_help_help =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_result, help, "help");
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_parsed,
+ .data = NULL,
+ .help_str = "show help",
+ .tokens = {
+ (void *)&cmd_help_help,
+ NULL,
+ },
+};
+
+/* *** QUIT *** */
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed,
+ .data = NULL,
+ .help_str = "exit application",
+ .tokens = {
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/* *** SET QAVG PARAMETERS *** */
+struct cmd_setqavg_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t param_string;
+ uint32_t number;
+};
+
+static void cmd_setqavg_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_setqavg_result *res = parsed_result;
+
+ if (!strcmp(res->param_string, "period"))
+ qavg_period = res->number;
+ else if (!strcmp(res->param_string, "n"))
+ qavg_ntimes = res->number;
+ else
+ printf("\nUnknown parameter.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_setqavg_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_setqavg_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_setqavg_param_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_setqavg_result, param_string,
+ "period#n");
+cmdline_parse_token_num_t cmd_setqavg_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_setqavg_result, number,
+ UINT32);
+
+cmdline_parse_inst_t cmd_setqavg = {
+ .f = cmd_setqavg_parsed,
+ .data = NULL,
+ .help_str = "Show subport stats.",
+ .tokens = {
+ (void *)&cmd_setqavg_qavg_string,
+ (void *)&cmd_setqavg_param_string,
+ (void *)&cmd_setqavg_number,
+ NULL,
+ },
+};
+
+/* *** SHOW APP STATS *** */
+struct cmd_appstats_result {
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t app_string;
+};
+
+static void cmd_appstats_parsed(__attribute__((unused)) void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ app_stat();
+}
+
+cmdline_parse_token_string_t cmd_appstats_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_appstats_result, stats_string,
+ "stats");
+cmdline_parse_token_string_t cmd_appstats_app_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_appstats_result, app_string,
+ "app");
+
+cmdline_parse_inst_t cmd_appstats = {
+ .f = cmd_appstats_parsed,
+ .data = NULL,
+ .help_str = "Show app stats.",
+ .tokens = {
+ (void *)&cmd_appstats_stats_string,
+ (void *)&cmd_appstats_app_string,
+ NULL,
+ },
+};
+
+/* *** SHOW SUBPORT STATS *** */
+struct cmd_subportstats_result {
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+};
+
+static void cmd_subportstats_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_subportstats_result *res = parsed_result;
+
+ if (subport_stat(res->port_number, res->subport_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_subportstats_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, stats_string,
+ "stats");
+cmdline_parse_token_string_t cmd_subportstats_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, port_string,
+ "port");
+cmdline_parse_token_string_t cmd_subportstats_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_subportstats_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_subportstats_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_subportstats_result, subport_number,
+ UINT32);
+cmdline_parse_token_num_t cmd_subportstats_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_subportstats_result, port_number,
+ UINT8);
+
+cmdline_parse_inst_t cmd_subportstats = {
+ .f = cmd_subportstats_parsed,
+ .data = NULL,
+ .help_str = "Show subport stats.",
+ .tokens = {
+ (void *)&cmd_subportstats_stats_string,
+ (void *)&cmd_subportstats_port_string,
+ (void *)&cmd_subportstats_port_number,
+ (void *)&cmd_subportstats_subport_string,
+ (void *)&cmd_subportstats_subport_number,
+ NULL,
+ },
+};
+
+/* *** SHOW PIPE STATS *** */
+struct cmd_pipestats_result {
+ cmdline_fixed_string_t stats_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+ cmdline_fixed_string_t pipe_string;
+ uint32_t pipe_number;
+};
+
+static void cmd_pipestats_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_pipestats_result *res = parsed_result;
+
+ if (pipe_stat(res->port_number, res->subport_number, res->pipe_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_pipestats_stats_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, stats_string,
+ "stats");
+cmdline_parse_token_string_t cmd_pipestats_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_pipestats_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_pipestats_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_pipestats_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, subport_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_pipestats_pipe_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_pipestats_result, pipe_string,
+ "pipe");
+cmdline_parse_token_num_t cmd_pipestats_pipe_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_pipestats_result, pipe_number,
+ UINT32);
+
+cmdline_parse_inst_t cmd_pipestats = {
+ .f = cmd_pipestats_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_pipestats_stats_string,
+ (void *)&cmd_pipestats_port_string,
+ (void *)&cmd_pipestats_port_number,
+ (void *)&cmd_pipestats_subport_string,
+ (void *)&cmd_pipestats_subport_number,
+ (void *)&cmd_pipestats_pipe_string,
+ (void *)&cmd_pipestats_pipe_number,
+ NULL,
+ },
+};
+
+/* *** SHOW AVERAGE QUEUE SIZE (QUEUE) *** */
+struct cmd_avg_q_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+ cmdline_fixed_string_t pipe_string;
+ uint32_t pipe_number;
+ cmdline_fixed_string_t tc_string;
+ uint8_t tc_number;
+ cmdline_fixed_string_t q_string;
+ uint8_t q_number;
+};
+
+static void cmd_avg_q_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_avg_q_result *res = parsed_result;
+
+ if (qavg_q(res->port_number, res->subport_number, res->pipe_number, res->tc_number, res->q_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_avg_q_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_avg_q_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_avg_q_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_q_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_avg_q_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, subport_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_q_pipe_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, pipe_string,
+ "pipe");
+cmdline_parse_token_num_t cmd_avg_q_pipe_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, pipe_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_q_tc_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, tc_string,
+ "tc");
+cmdline_parse_token_num_t cmd_avg_q_tc_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, tc_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_q_q_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_q_result, q_string,
+ "q");
+cmdline_parse_token_num_t cmd_avg_q_q_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_q_result, q_number,
+ UINT8);
+
+cmdline_parse_inst_t cmd_avg_q = {
+ .f = cmd_avg_q_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_avg_q_qavg_string,
+ (void *)&cmd_avg_q_port_string,
+ (void *)&cmd_avg_q_port_number,
+ (void *)&cmd_avg_q_subport_string,
+ (void *)&cmd_avg_q_subport_number,
+ (void *)&cmd_avg_q_pipe_string,
+ (void *)&cmd_avg_q_pipe_number,
+ (void *)&cmd_avg_q_tc_string,
+ (void *)&cmd_avg_q_tc_number,
+ (void *)&cmd_avg_q_q_string,
+ (void *)&cmd_avg_q_q_number,
+ NULL,
+ },
+};
+
+/* *** SHOW AVERAGE QUEUE SIZE (tc/pipe) *** */
+struct cmd_avg_tcpipe_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+ cmdline_fixed_string_t pipe_string;
+ uint32_t pipe_number;
+ cmdline_fixed_string_t tc_string;
+ uint8_t tc_number;
+};
+
+static void cmd_avg_tcpipe_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_avg_tcpipe_result *res = parsed_result;
+
+ if (qavg_tcpipe(res->port_number, res->subport_number, res->pipe_number, res->tc_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_avg_tcpipe_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_avg_tcpipe_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_avg_tcpipe_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_tcpipe_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_avg_tcpipe_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, subport_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_tcpipe_pipe_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, pipe_string,
+ "pipe");
+cmdline_parse_token_num_t cmd_avg_tcpipe_pipe_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, pipe_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_tcpipe_tc_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcpipe_result, tc_string,
+ "tc");
+cmdline_parse_token_num_t cmd_avg_tcpipe_tc_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcpipe_result, tc_number,
+ UINT8);
+
+cmdline_parse_inst_t cmd_avg_tcpipe = {
+ .f = cmd_avg_tcpipe_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_avg_tcpipe_qavg_string,
+ (void *)&cmd_avg_tcpipe_port_string,
+ (void *)&cmd_avg_tcpipe_port_number,
+ (void *)&cmd_avg_tcpipe_subport_string,
+ (void *)&cmd_avg_tcpipe_subport_number,
+ (void *)&cmd_avg_tcpipe_pipe_string,
+ (void *)&cmd_avg_tcpipe_pipe_number,
+ (void *)&cmd_avg_tcpipe_tc_string,
+ (void *)&cmd_avg_tcpipe_tc_number,
+ NULL,
+ },
+};
+
+/* *** SHOW AVERAGE QUEUE SIZE (pipe) *** */
+struct cmd_avg_pipe_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+ cmdline_fixed_string_t pipe_string;
+ uint32_t pipe_number;
+};
+
+static void cmd_avg_pipe_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_avg_pipe_result *res = parsed_result;
+
+ if (qavg_pipe(res->port_number, res->subport_number, res->pipe_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_avg_pipe_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_avg_pipe_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_avg_pipe_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_pipe_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_avg_pipe_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, subport_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_pipe_pipe_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_pipe_result, pipe_string,
+ "pipe");
+cmdline_parse_token_num_t cmd_avg_pipe_pipe_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_pipe_result, pipe_number,
+ UINT32);
+
+cmdline_parse_inst_t cmd_avg_pipe = {
+ .f = cmd_avg_pipe_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_avg_pipe_qavg_string,
+ (void *)&cmd_avg_pipe_port_string,
+ (void *)&cmd_avg_pipe_port_number,
+ (void *)&cmd_avg_pipe_subport_string,
+ (void *)&cmd_avg_pipe_subport_number,
+ (void *)&cmd_avg_pipe_pipe_string,
+ (void *)&cmd_avg_pipe_pipe_number,
+ NULL,
+ },
+};
+
+/* *** SHOW AVERAGE QUEUE SIZE (tc/subport) *** */
+struct cmd_avg_tcsubport_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+ cmdline_fixed_string_t tc_string;
+ uint8_t tc_number;
+};
+
+static void cmd_avg_tcsubport_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_avg_tcsubport_result *res = parsed_result;
+
+ if (qavg_tcsubport(res->port_number, res->subport_number, res->tc_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_avg_tcsubport_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_avg_tcsubport_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_avg_tcsubport_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_tcsubport_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_avg_tcsubport_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, subport_number,
+ UINT32);
+cmdline_parse_token_string_t cmd_avg_tcsubport_tc_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_tcsubport_result, tc_string,
+ "tc");
+cmdline_parse_token_num_t cmd_avg_tcsubport_tc_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_tcsubport_result, tc_number,
+ UINT8);
+
+cmdline_parse_inst_t cmd_avg_tcsubport = {
+ .f = cmd_avg_tcsubport_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_avg_tcsubport_qavg_string,
+ (void *)&cmd_avg_tcsubport_port_string,
+ (void *)&cmd_avg_tcsubport_port_number,
+ (void *)&cmd_avg_tcsubport_subport_string,
+ (void *)&cmd_avg_tcsubport_subport_number,
+ (void *)&cmd_avg_tcsubport_tc_string,
+ (void *)&cmd_avg_tcsubport_tc_number,
+ NULL,
+ },
+};
+
+/* *** SHOW AVERAGE QUEUE SIZE (subport) *** */
+struct cmd_avg_subport_result {
+ cmdline_fixed_string_t qavg_string;
+ cmdline_fixed_string_t port_string;
+ uint8_t port_number;
+ cmdline_fixed_string_t subport_string;
+ uint32_t subport_number;
+};
+
+static void cmd_avg_subport_parsed(void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_avg_subport_result *res = parsed_result;
+
+ if (qavg_subport(res->port_number, res->subport_number) < 0)
+ printf ("\nStats not available for these parameters. Check that both the port and subport are correct.\n\n");
+}
+
+cmdline_parse_token_string_t cmd_avg_subport_qavg_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, qavg_string,
+ "qavg");
+cmdline_parse_token_string_t cmd_avg_subport_port_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, port_string,
+ "port");
+cmdline_parse_token_num_t cmd_avg_subport_port_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_subport_result, port_number,
+ UINT8);
+cmdline_parse_token_string_t cmd_avg_subport_subport_string =
+ TOKEN_STRING_INITIALIZER(struct cmd_avg_subport_result, subport_string,
+ "subport");
+cmdline_parse_token_num_t cmd_avg_subport_subport_number =
+ TOKEN_NUM_INITIALIZER(struct cmd_avg_subport_result, subport_number,
+ UINT32);
+
+cmdline_parse_inst_t cmd_avg_subport = {
+ .f = cmd_avg_subport_parsed,
+ .data = NULL,
+ .help_str = "Show pipe stats.",
+ .tokens = {
+ (void *)&cmd_avg_subport_qavg_string,
+ (void *)&cmd_avg_subport_port_string,
+ (void *)&cmd_avg_subport_port_number,
+ (void *)&cmd_avg_subport_subport_string,
+ (void *)&cmd_avg_subport_subport_number,
+ NULL,
+ },
+};
+
+/* ******************************************************************************** */
+
+/* list of instructions */
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_help,
+ (cmdline_parse_inst_t *)&cmd_setqavg,
+ (cmdline_parse_inst_t *)&cmd_appstats,
+ (cmdline_parse_inst_t *)&cmd_subportstats,
+ (cmdline_parse_inst_t *)&cmd_pipestats,
+ (cmdline_parse_inst_t *)&cmd_avg_q,
+ (cmdline_parse_inst_t *)&cmd_avg_tcpipe,
+ (cmdline_parse_inst_t *)&cmd_avg_pipe,
+ (cmdline_parse_inst_t *)&cmd_avg_tcsubport,
+ (cmdline_parse_inst_t *)&cmd_avg_subport,
+ (cmdline_parse_inst_t *)&cmd_quit,
+ NULL,
+};
+
+/* prompt function, called from main on MASTER lcore */
+void
+prompt(void)
+{
+ struct cmdline *cl;
+
+ cl = cmdline_stdin_new(main_ctx, "qos_sched> ");
+ if (cl == NULL) {
+ return;
+ }
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+}
diff --git a/examples/qos_sched/init.c b/examples/qos_sched/init.c
new file mode 100644
index 00000000..70e12bb4
--- /dev/null
+++ b/examples/qos_sched/init.c
@@ -0,0 +1,370 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <memory.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_debug.h>
+#include <rte_ethdev.h>
+#include <rte_mempool.h>
+#include <rte_sched.h>
+#include <rte_cycles.h>
+#include <rte_string_fns.h>
+#include <rte_cfgfile.h>
+
+#include "main.h"
+#include "cfg_file.h"
+
+uint32_t app_numa_mask = 0;
+static uint32_t app_inited_port_mask = 0;
+
+int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+#define MAX_NAME_LEN 32
+
+struct ring_conf ring_conf = {
+ .rx_size = APP_RX_DESC_DEFAULT,
+ .ring_size = APP_RING_SIZE,
+ .tx_size = APP_TX_DESC_DEFAULT,
+};
+
+struct burst_conf burst_conf = {
+ .rx_burst = MAX_PKT_RX_BURST,
+ .ring_burst = PKT_ENQUEUE,
+ .qos_dequeue = PKT_DEQUEUE,
+ .tx_burst = MAX_PKT_TX_BURST,
+};
+
+struct ring_thresh rx_thresh = {
+ .pthresh = RX_PTHRESH,
+ .hthresh = RX_HTHRESH,
+ .wthresh = RX_WTHRESH,
+};
+
+struct ring_thresh tx_thresh = {
+ .pthresh = TX_PTHRESH,
+ .hthresh = TX_HTHRESH,
+ .wthresh = TX_WTHRESH,
+};
+
+uint32_t nb_pfc;
+const char *cfg_profile = NULL;
+int mp_size = NB_MBUF;
+struct flow_conf qos_conf[MAX_DATA_STREAMS];
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .max_rx_pkt_len = ETHER_MAX_LEN,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_DCB_NONE,
+ },
+};
+
+static int
+app_init_port(uint8_t portid, struct rte_mempool *mp)
+{
+ int ret;
+ struct rte_eth_link link;
+ struct rte_eth_rxconf rx_conf;
+ struct rte_eth_txconf tx_conf;
+
+ /* check if port already initialized (multistream configuration) */
+ if (app_inited_port_mask & (1u << portid))
+ return 0;
+
+ rx_conf.rx_thresh.pthresh = rx_thresh.pthresh;
+ rx_conf.rx_thresh.hthresh = rx_thresh.hthresh;
+ rx_conf.rx_thresh.wthresh = rx_thresh.wthresh;
+ rx_conf.rx_free_thresh = 32;
+ rx_conf.rx_drop_en = 0;
+
+ tx_conf.tx_thresh.pthresh = tx_thresh.pthresh;
+ tx_conf.tx_thresh.hthresh = tx_thresh.hthresh;
+ tx_conf.tx_thresh.wthresh = tx_thresh.wthresh;
+ tx_conf.tx_free_thresh = 0;
+ tx_conf.tx_rs_thresh = 0;
+ tx_conf.txq_flags = ETH_TXQ_FLAGS_NOMULTSEGS | ETH_TXQ_FLAGS_NOOFFLOADS;
+
+ /* init port */
+ RTE_LOG(INFO, APP, "Initializing port %"PRIu8"... ", portid);
+ fflush(stdout);
+ ret = rte_eth_dev_configure(portid, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure device: "
+ "err=%d, port=%"PRIu8"\n", ret, portid);
+
+ /* init one RX queue */
+ fflush(stdout);
+ ret = rte_eth_rx_queue_setup(portid, 0, (uint16_t)ring_conf.rx_size,
+ rte_eth_dev_socket_id(portid), &rx_conf, mp);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: "
+ "err=%d, port=%"PRIu8"\n", ret, portid);
+
+ /* init one TX queue */
+ fflush(stdout);
+ ret = rte_eth_tx_queue_setup(portid, 0,
+ (uint16_t)ring_conf.tx_size, rte_eth_dev_socket_id(portid), &tx_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_eth_tx_queue_setup: err=%d, "
+ "port=%"PRIu8" queue=%d\n", ret, portid, 0);
+
+ /* Start device */
+ ret = rte_eth_dev_start(portid);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "rte_pmd_port_start: "
+ "err=%d, port=%"PRIu8"\n", ret, portid);
+
+ printf("done: ");
+
+ /* get link status */
+ rte_eth_link_get(portid, &link);
+ if (link.link_status) {
+ printf(" Link Up - speed %u Mbps - %s\n",
+ (uint32_t) link.link_speed,
+ (link.link_duplex == ETH_LINK_FULL_DUPLEX) ?
+ ("full-duplex") : ("half-duplex\n"));
+ } else {
+ printf(" Link Down\n");
+ }
+ rte_eth_promiscuous_enable(portid);
+
+ /* mark port as initialized */
+ app_inited_port_mask |= 1u << portid;
+
+ return 0;
+}
+
+static struct rte_sched_subport_params subport_params[MAX_SCHED_SUBPORTS] = {
+ {
+ .tb_rate = 1250000000,
+ .tb_size = 1000000,
+
+ .tc_rate = {1250000000, 1250000000, 1250000000, 1250000000},
+ .tc_period = 10,
+ },
+};
+
+static struct rte_sched_pipe_params pipe_profiles[RTE_SCHED_PIPE_PROFILES_PER_PORT] = {
+ { /* Profile #0 */
+ .tb_rate = 305175,
+ .tb_size = 1000000,
+
+ .tc_rate = {305175, 305175, 305175, 305175},
+ .tc_period = 40,
+#ifdef RTE_SCHED_SUBPORT_TC_OV
+ .tc_ov_weight = 1,
+#endif
+
+ .wrr_weights = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
+ },
+};
+
+struct rte_sched_port_params port_params = {
+ .name = "port_scheduler_0",
+ .socket = 0, /* computed */
+ .rate = 0, /* computed */
+ .mtu = 6 + 6 + 4 + 4 + 2 + 1500,
+ .frame_overhead = RTE_SCHED_FRAME_OVERHEAD_DEFAULT,
+ .n_subports_per_port = 1,
+ .n_pipes_per_subport = 4096,
+ .qsize = {64, 64, 64, 64},
+ .pipe_profiles = pipe_profiles,
+ .n_pipe_profiles = sizeof(pipe_profiles) / sizeof(struct rte_sched_pipe_params),
+
+#ifdef RTE_SCHED_RED
+ .red_params = {
+ /* Traffic Class 0 Colors Green / Yellow / Red */
+ [0][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [0][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [0][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 1 - Colors Green / Yellow / Red */
+ [1][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [1][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [1][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 2 - Colors Green / Yellow / Red */
+ [2][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [2][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [2][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+
+ /* Traffic Class 3 - Colors Green / Yellow / Red */
+ [3][0] = {.min_th = 48, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [3][1] = {.min_th = 40, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9},
+ [3][2] = {.min_th = 32, .max_th = 64, .maxp_inv = 10, .wq_log2 = 9}
+ }
+#endif /* RTE_SCHED_RED */
+};
+
+static struct rte_sched_port *
+app_init_sched_port(uint32_t portid, uint32_t socketid)
+{
+ static char port_name[32]; /* static as referenced from global port_params*/
+ struct rte_eth_link link;
+ struct rte_sched_port *port = NULL;
+ uint32_t pipe, subport;
+ int err;
+
+ rte_eth_link_get((uint8_t)portid, &link);
+
+ port_params.socket = socketid;
+ port_params.rate = (uint64_t) link.link_speed * 1000 * 1000 / 8;
+ snprintf(port_name, sizeof(port_name), "port_%d", portid);
+ port_params.name = port_name;
+
+ port = rte_sched_port_config(&port_params);
+ if (port == NULL){
+ rte_exit(EXIT_FAILURE, "Unable to config sched port\n");
+ }
+
+ for (subport = 0; subport < port_params.n_subports_per_port; subport ++) {
+ err = rte_sched_subport_config(port, subport, &subport_params[subport]);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Unable to config sched subport %u, err=%d\n",
+ subport, err);
+ }
+
+ for (pipe = 0; pipe < port_params.n_pipes_per_subport; pipe ++) {
+ if (app_pipe_to_profile[subport][pipe] != -1) {
+ err = rte_sched_pipe_config(port, subport, pipe,
+ app_pipe_to_profile[subport][pipe]);
+ if (err) {
+ rte_exit(EXIT_FAILURE, "Unable to config sched pipe %u "
+ "for profile %d, err=%d\n", pipe,
+ app_pipe_to_profile[subport][pipe], err);
+ }
+ }
+ }
+ }
+
+ return port;
+}
+
+static int
+app_load_cfg_profile(const char *profile)
+{
+ if (profile == NULL)
+ return 0;
+ struct rte_cfgfile *file = rte_cfgfile_load(profile, 0);
+ if (file == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot load configuration profile %s\n", profile);
+
+ cfg_load_port(file, &port_params);
+ cfg_load_subport(file, subport_params);
+ cfg_load_pipe(file, pipe_profiles);
+
+ rte_cfgfile_close(file);
+
+ return 0;
+}
+
+int app_init(void)
+{
+ uint32_t i;
+ char ring_name[MAX_NAME_LEN];
+ char pool_name[MAX_NAME_LEN];
+
+ if (rte_eth_dev_count() == 0)
+ rte_exit(EXIT_FAILURE, "No Ethernet port - bye\n");
+
+ /* load configuration profile */
+ if (app_load_cfg_profile(cfg_profile) != 0)
+ rte_exit(EXIT_FAILURE, "Invalid configuration profile\n");
+
+ /* Initialize each active flow */
+ for(i = 0; i < nb_pfc; i++) {
+ uint32_t socket = rte_lcore_to_socket_id(qos_conf[i].rx_core);
+ struct rte_ring *ring;
+
+ snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].rx_core);
+ ring = rte_ring_lookup(ring_name);
+ if (ring == NULL)
+ qos_conf[i].rx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+ socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ else
+ qos_conf[i].rx_ring = ring;
+
+ snprintf(ring_name, MAX_NAME_LEN, "ring-%u-%u", i, qos_conf[i].tx_core);
+ ring = rte_ring_lookup(ring_name);
+ if (ring == NULL)
+ qos_conf[i].tx_ring = rte_ring_create(ring_name, ring_conf.ring_size,
+ socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ else
+ qos_conf[i].tx_ring = ring;
+
+
+ /* create the mbuf pools for each RX Port */
+ snprintf(pool_name, MAX_NAME_LEN, "mbuf_pool%u", i);
+ qos_conf[i].mbuf_pool = rte_pktmbuf_pool_create(pool_name,
+ mp_size, burst_conf.rx_burst * 4, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE,
+ rte_eth_dev_socket_id(qos_conf[i].rx_port));
+ if (qos_conf[i].mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot init mbuf pool for socket %u\n", i);
+
+ app_init_port(qos_conf[i].rx_port, qos_conf[i].mbuf_pool);
+ app_init_port(qos_conf[i].tx_port, qos_conf[i].mbuf_pool);
+
+ qos_conf[i].sched_port = app_init_sched_port(qos_conf[i].tx_port, socket);
+ }
+
+ RTE_LOG(INFO, APP, "time stamp clock running at %" PRIu64 " Hz\n",
+ rte_get_timer_hz());
+
+ RTE_LOG(INFO, APP, "Ring sizes: NIC RX = %u, Mempool = %d SW queue = %u,"
+ "NIC TX = %u\n", ring_conf.rx_size, mp_size, ring_conf.ring_size,
+ ring_conf.tx_size);
+
+ RTE_LOG(INFO, APP, "Burst sizes: RX read = %hu, RX write = %hu,\n"
+ " Worker read/QoS enqueue = %hu,\n"
+ " QoS dequeue = %hu, Worker write = %hu\n",
+ burst_conf.rx_burst, burst_conf.ring_burst, burst_conf.ring_burst,
+ burst_conf.qos_dequeue, burst_conf.tx_burst);
+
+ RTE_LOG(INFO, APP, "NIC thresholds RX (p = %hhu, h = %hhu, w = %hhu),"
+ "TX (p = %hhu, h = %hhu, w = %hhu)\n",
+ rx_thresh.pthresh, rx_thresh.hthresh, rx_thresh.wthresh,
+ tx_thresh.pthresh, tx_thresh.hthresh, tx_thresh.wthresh);
+
+ return 0;
+}
diff --git a/examples/qos_sched/main.c b/examples/qos_sched/main.c
new file mode 100644
index 00000000..e16b164d
--- /dev/null
+++ b/examples/qos_sched/main.c
@@ -0,0 +1,254 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdint.h>
+
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_memcpy.h>
+#include <rte_byteorder.h>
+#include <rte_branch_prediction.h>
+
+#include <rte_sched.h>
+
+#include "main.h"
+
+#define APP_MODE_NONE 0
+#define APP_RX_MODE 1
+#define APP_WT_MODE 2
+#define APP_TX_MODE 4
+
+uint8_t interactive = APP_INTERACTIVE_DEFAULT;
+uint32_t qavg_period = APP_QAVG_PERIOD;
+uint32_t qavg_ntimes = APP_QAVG_NTIMES;
+
+/* main processing loop */
+static int
+app_main_loop(__attribute__((unused))void *dummy)
+{
+ uint32_t lcore_id;
+ uint32_t i, mode;
+ uint32_t rx_idx = 0;
+ uint32_t wt_idx = 0;
+ uint32_t tx_idx = 0;
+ struct thread_conf *rx_confs[MAX_DATA_STREAMS];
+ struct thread_conf *wt_confs[MAX_DATA_STREAMS];
+ struct thread_conf *tx_confs[MAX_DATA_STREAMS];
+
+ memset(rx_confs, 0, sizeof(rx_confs));
+ memset(wt_confs, 0, sizeof(wt_confs));
+ memset(tx_confs, 0, sizeof(tx_confs));
+
+
+ mode = APP_MODE_NONE;
+ lcore_id = rte_lcore_id();
+
+ for (i = 0; i < nb_pfc; i++) {
+ struct flow_conf *flow = &qos_conf[i];
+
+ if (flow->rx_core == lcore_id) {
+ flow->rx_thread.rx_port = flow->rx_port;
+ flow->rx_thread.rx_ring = flow->rx_ring;
+ flow->rx_thread.rx_queue = flow->rx_queue;
+
+ rx_confs[rx_idx++] = &flow->rx_thread;
+
+ mode |= APP_RX_MODE;
+ }
+ if (flow->tx_core == lcore_id) {
+ flow->tx_thread.tx_port = flow->tx_port;
+ flow->tx_thread.tx_ring = flow->tx_ring;
+ flow->tx_thread.tx_queue = flow->tx_queue;
+
+ tx_confs[tx_idx++] = &flow->tx_thread;
+
+ mode |= APP_TX_MODE;
+ }
+ if (flow->wt_core == lcore_id) {
+ flow->wt_thread.rx_ring = flow->rx_ring;
+ flow->wt_thread.tx_ring = flow->tx_ring;
+ flow->wt_thread.tx_port = flow->tx_port;
+ flow->wt_thread.sched_port = flow->sched_port;
+
+ wt_confs[wt_idx++] = &flow->wt_thread;
+
+ mode |= APP_WT_MODE;
+ }
+ }
+
+ if (mode == APP_MODE_NONE) {
+ RTE_LOG(INFO, APP, "lcore %u has nothing to do\n", lcore_id);
+ return -1;
+ }
+
+ if (mode == (APP_RX_MODE | APP_WT_MODE)) {
+ RTE_LOG(INFO, APP, "lcore %u was configured for both RX and WT !!!\n",
+ lcore_id);
+ return -1;
+ }
+
+ RTE_LOG(INFO, APP, "entering main loop on lcore %u\n", lcore_id);
+ /* initialize mbuf memory */
+ if (mode == APP_RX_MODE) {
+ for (i = 0; i < rx_idx; i++) {
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u "
+ "reading port %"PRIu8"\n",
+ i, lcore_id, rx_confs[i]->rx_port);
+ }
+
+ app_rx_thread(rx_confs);
+ }
+ else if (mode == (APP_TX_MODE | APP_WT_MODE)) {
+ for (i = 0; i < wt_idx; i++) {
+ wt_confs[i]->m_table = rte_malloc("table_wt", sizeof(struct rte_mbuf *)
+ * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
+
+ if (wt_confs[i]->m_table == NULL)
+ rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u sched+write "
+ "port %"PRIu8"\n",
+ i, lcore_id, wt_confs[i]->tx_port);
+ }
+
+ app_mixed_thread(wt_confs);
+ }
+ else if (mode == APP_TX_MODE) {
+ for (i = 0; i < tx_idx; i++) {
+ tx_confs[i]->m_table = rte_malloc("table_tx", sizeof(struct rte_mbuf *)
+ * burst_conf.tx_burst, RTE_CACHE_LINE_SIZE);
+
+ if (tx_confs[i]->m_table == NULL)
+ rte_panic("flow %u unable to allocate memory buffer\n", i);
+
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u "
+ "writing port %"PRIu8"\n",
+ i, lcore_id, tx_confs[i]->tx_port);
+ }
+
+ app_tx_thread(tx_confs);
+ }
+ else if (mode == APP_WT_MODE){
+ for (i = 0; i < wt_idx; i++) {
+ RTE_LOG(INFO, APP, "flow %u lcoreid %u scheduling \n", i, lcore_id);
+ }
+
+ app_worker_thread(wt_confs);
+ }
+
+ return 0;
+}
+
+void
+app_stat(void)
+{
+ uint32_t i;
+ struct rte_eth_stats stats;
+ static struct rte_eth_stats rx_stats[MAX_DATA_STREAMS];
+ static struct rte_eth_stats tx_stats[MAX_DATA_STREAMS];
+
+ /* print statistics */
+ for(i = 0; i < nb_pfc; i++) {
+ struct flow_conf *flow = &qos_conf[i];
+
+ rte_eth_stats_get(flow->rx_port, &stats);
+ printf("\nRX port %"PRIu8": rx: %"PRIu64 " err: %"PRIu64
+ " no_mbuf: %"PRIu64 "\n",
+ flow->rx_port,
+ stats.ipackets - rx_stats[i].ipackets,
+ stats.ierrors - rx_stats[i].ierrors,
+ stats.rx_nombuf - rx_stats[i].rx_nombuf);
+ memcpy(&rx_stats[i], &stats, sizeof(stats));
+
+ rte_eth_stats_get(flow->tx_port, &stats);
+ printf("TX port %"PRIu8": tx: %" PRIu64 " err: %" PRIu64 "\n",
+ flow->tx_port,
+ stats.opackets - tx_stats[i].opackets,
+ stats.oerrors - tx_stats[i].oerrors);
+ memcpy(&tx_stats[i], &stats, sizeof(stats));
+
+ //printf("MP = %d\n", rte_mempool_count(conf->app_pktmbuf_pool));
+
+#if APP_COLLECT_STAT
+ printf("-------+------------+------------+\n");
+ printf(" | received | dropped |\n");
+ printf("-------+------------+------------+\n");
+ printf(" RX | %10" PRIu64 " | %10" PRIu64 " |\n",
+ flow->rx_thread.stat.nb_rx,
+ flow->rx_thread.stat.nb_drop);
+ printf("QOS+TX | %10" PRIu64 " | %10" PRIu64 " | pps: %"PRIu64 " \n",
+ flow->wt_thread.stat.nb_rx,
+ flow->wt_thread.stat.nb_drop,
+ flow->wt_thread.stat.nb_rx - flow->wt_thread.stat.nb_drop);
+ printf("-------+------------+------------+\n");
+
+ memset(&flow->rx_thread.stat, 0, sizeof(struct thread_stat));
+ memset(&flow->wt_thread.stat, 0, sizeof(struct thread_stat));
+#endif
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+
+ ret = app_parse_args(argc, argv);
+ if (ret < 0)
+ return -1;
+
+ ret = app_init();
+ if (ret < 0)
+ return -1;
+
+ /* launch per-lcore init on every lcore */
+ rte_eal_mp_remote_launch(app_main_loop, NULL, SKIP_MASTER);
+
+ if (interactive) {
+ sleep(1);
+ prompt();
+ }
+ else {
+ /* print statistics every second */
+ while(1) {
+ sleep(1);
+ app_stat();
+ }
+ }
+
+ return 0;
+}
diff --git a/examples/qos_sched/main.h b/examples/qos_sched/main.h
new file mode 100644
index 00000000..82aa0fae
--- /dev/null
+++ b/examples/qos_sched/main.h
@@ -0,0 +1,195 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <rte_sched.h>
+
+#define RTE_LOGTYPE_APP RTE_LOGTYPE_USER1
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define APP_INTERACTIVE_DEFAULT 0
+
+#define APP_RX_DESC_DEFAULT 128
+#define APP_TX_DESC_DEFAULT 256
+
+#define APP_RING_SIZE (8*1024)
+#define NB_MBUF (2*1024*1024)
+
+#define MAX_PKT_RX_BURST 64
+#define PKT_ENQUEUE 64
+#define PKT_DEQUEUE 32
+#define MAX_PKT_TX_BURST 64
+
+#define RX_PTHRESH 8 /**< Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /**< Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /**< Default values of RX write-back threshold reg. */
+
+#define TX_PTHRESH 36 /**< Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0 /**< Default values of TX host threshold reg. */
+#define TX_WTHRESH 0 /**< Default values of TX write-back threshold reg. */
+
+#define BURST_TX_DRAIN_US 100
+
+#define MAX_DATA_STREAMS (RTE_MAX_LCORE/2)
+#define MAX_SCHED_SUBPORTS 8
+#define MAX_SCHED_PIPES 4096
+
+#ifndef APP_COLLECT_STAT
+#define APP_COLLECT_STAT 1
+#endif
+
+#if APP_COLLECT_STAT
+#define APP_STATS_ADD(stat,val) (stat) += (val)
+#else
+#define APP_STATS_ADD(stat,val) do {(void) (val);} while (0)
+#endif
+
+#define APP_QAVG_NTIMES 10
+#define APP_QAVG_PERIOD 100
+
+struct thread_stat
+{
+ uint64_t nb_rx;
+ uint64_t nb_drop;
+};
+
+
+struct thread_conf
+{
+ uint32_t counter;
+ uint32_t n_mbufs;
+ struct rte_mbuf **m_table;
+
+ uint8_t rx_port;
+ uint8_t tx_port;
+ uint16_t rx_queue;
+ uint16_t tx_queue;
+ struct rte_ring *rx_ring;
+ struct rte_ring *tx_ring;
+ struct rte_sched_port *sched_port;
+
+#if APP_COLLECT_STAT
+ struct thread_stat stat;
+#endif
+} __rte_cache_aligned;
+
+
+struct flow_conf
+{
+ uint32_t rx_core;
+ uint32_t wt_core;
+ uint32_t tx_core;
+ uint8_t rx_port;
+ uint8_t tx_port;
+ uint16_t rx_queue;
+ uint16_t tx_queue;
+ struct rte_ring *rx_ring;
+ struct rte_ring *tx_ring;
+ struct rte_sched_port *sched_port;
+ struct rte_mempool *mbuf_pool;
+
+ struct thread_conf rx_thread;
+ struct thread_conf wt_thread;
+ struct thread_conf tx_thread;
+};
+
+
+struct ring_conf
+{
+ uint32_t rx_size;
+ uint32_t ring_size;
+ uint32_t tx_size;
+};
+
+struct burst_conf
+{
+ uint16_t rx_burst;
+ uint16_t ring_burst;
+ uint16_t qos_dequeue;
+ uint16_t tx_burst;
+};
+
+struct ring_thresh
+{
+ uint8_t pthresh; /**< Ring prefetch threshold. */
+ uint8_t hthresh; /**< Ring host threshold. */
+ uint8_t wthresh; /**< Ring writeback threshold. */
+};
+
+extern uint8_t interactive;
+extern uint32_t qavg_period;
+extern uint32_t qavg_ntimes;
+extern uint32_t nb_pfc;
+extern const char *cfg_profile;
+extern int mp_size;
+extern struct flow_conf qos_conf[];
+extern int app_pipe_to_profile[MAX_SCHED_SUBPORTS][MAX_SCHED_PIPES];
+
+extern struct ring_conf ring_conf;
+extern struct burst_conf burst_conf;
+extern struct ring_thresh rx_thresh;
+extern struct ring_thresh tx_thresh;
+
+extern struct rte_sched_port_params port_params;
+
+int app_parse_args(int argc, char **argv);
+int app_init(void);
+
+void prompt(void);
+void app_rx_thread(struct thread_conf **qconf);
+void app_tx_thread(struct thread_conf **qconf);
+void app_worker_thread(struct thread_conf **qconf);
+void app_mixed_thread(struct thread_conf **qconf);
+
+void app_stat(void);
+int subport_stat(uint8_t port_id, uint32_t subport_id);
+int pipe_stat(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id);
+int qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc, uint8_t q);
+int qavg_tcpipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc);
+int qavg_pipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id);
+int qavg_tcsubport(uint8_t port_id, uint32_t subport_id, uint8_t tc);
+int qavg_subport(uint8_t port_id, uint32_t subport_id);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _MAIN_H_ */
diff --git a/examples/qos_sched/profile.cfg b/examples/qos_sched/profile.cfg
new file mode 100644
index 00000000..f5b704cc
--- /dev/null
+++ b/examples/qos_sched/profile.cfg
@@ -0,0 +1,104 @@
+; BSD LICENSE
+;
+; Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; This file enables the following hierarchical scheduler configuration for each
+; 10GbE output port:
+; * Single subport (subport 0):
+; - Subport rate set to 100% of port rate
+; - Each of the 4 traffic classes has rate set to 100% of port rate
+; * 4K pipes per subport 0 (pipes 0 .. 4095) with identical configuration:
+; - Pipe rate set to 1/4K of port rate
+; - Each of the 4 traffic classes has rate set to 100% of pipe rate
+; - Within each traffic class, the byte-level WRR weights for the 4 queues
+; are set to 1:1:1:1
+;
+; For more details, please refer to chapter "Quality of Service (QoS) Framework"
+; of Data Plane Development Kit (DPDK) Programmer's Guide.
+
+; Port configuration
+[port]
+frame overhead = 24
+number of subports per port = 1
+number of pipes per subport = 4096
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 1250000000 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 1250000000 ; Bytes per second
+tc 1 rate = 1250000000 ; Bytes per second
+tc 2 rate = 1250000000 ; Bytes per second
+tc 3 rate = 1250000000 ; Bytes per second
+tc period = 10 ; Milliseconds
+
+pipe 0-4095 = 0 ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 305175 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 305175 ; Bytes per second
+tc 1 rate = 305175 ; Bytes per second
+tc 2 rate = 305175 ; Bytes per second
+tc 3 rate = 305175 ; Bytes per second
+tc period = 40 ; Milliseconds
+
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
diff --git a/examples/qos_sched/profile_ov.cfg b/examples/qos_sched/profile_ov.cfg
new file mode 100644
index 00000000..33000df9
--- /dev/null
+++ b/examples/qos_sched/profile_ov.cfg
@@ -0,0 +1,90 @@
+; BSD LICENSE
+;
+; Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+; All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+;
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+; Port configuration
+[port]
+frame overhead = 24
+number of subports per port = 1
+number of pipes per subport = 32
+queue sizes = 64 64 64 64
+
+; Subport configuration
+[subport 0]
+tb rate = 8400000 ; Bytes per second
+tb size = 100000 ; Bytes
+
+tc 0 rate = 8400000 ; Bytes per second
+tc 1 rate = 8400000 ; Bytes per second
+tc 2 rate = 8400000 ; Bytes per second
+tc 3 rate = 8400000 ; Bytes per second
+tc period = 10 ; Milliseconds
+
+pipe 0-31 = 0 ; These pipes are configured with pipe profile 0
+
+; Pipe configuration
+[pipe profile 0]
+tb rate = 16800000 ; Bytes per second
+tb size = 1000000 ; Bytes
+
+tc 0 rate = 16800000 ; Bytes per second
+tc 1 rate = 16800000 ; Bytes per second
+tc 2 rate = 16800000 ; Bytes per second
+tc 3 rate = 16800000 ; Bytes per second
+tc period = 28 ; Milliseconds
+
+tc 3 oversubscription weight = 1
+
+tc 0 wrr weights = 1 1 1 1
+tc 1 wrr weights = 1 1 1 1
+tc 2 wrr weights = 1 1 1 1
+tc 3 wrr weights = 1 1 1 1
+
+; RED params per traffic class and color (Green / Yellow / Red)
+[red]
+tc 0 wred min = 48 40 32
+tc 0 wred max = 64 64 64
+tc 0 wred inv prob = 10 10 10
+tc 0 wred weight = 9 9 9
+
+tc 1 wred min = 48 40 32
+tc 1 wred max = 64 64 64
+tc 1 wred inv prob = 10 10 10
+tc 1 wred weight = 9 9 9
+
+tc 2 wred min = 48 40 32
+tc 2 wred max = 64 64 64
+tc 2 wred inv prob = 10 10 10
+tc 2 wred weight = 9 9 9
+
+tc 3 wred min = 48 40 32
+tc 3 wred max = 64 64 64
+tc 3 wred inv prob = 10 10 10
+tc 3 wred weight = 9 9 9
diff --git a/examples/qos_sched/stats.c b/examples/qos_sched/stats.c
new file mode 100644
index 00000000..5c894455
--- /dev/null
+++ b/examples/qos_sched/stats.c
@@ -0,0 +1,315 @@
+/*-
+ * * BSD LICENSE
+ * *
+ * * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * * All rights reserved.
+ * *
+ * * Redistribution and use in source and binary forms, with or without
+ * * modification, are permitted provided that the following conditions
+ * * are met:
+ * *
+ * * * Redistributions of source code must retain the above copyright
+ * * notice, this list of conditions and the following disclaimer.
+ * * * Redistributions in binary form must reproduce the above copyright
+ * * notice, this list of conditions and the following disclaimer in
+ * * the documentation and/or other materials provided with the
+ * * distribution.
+ * * * Neither the name of Intel Corporation nor the names of its
+ * * contributors may be used to endorse or promote products derived
+ * * from this software without specific prior written permission.
+ * *
+ * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ * */
+
+#include <unistd.h>
+#include <string.h>
+
+#include "main.h"
+
+int
+qavg_q(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc, uint8_t q)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint32_t queue_id, count, i;
+ uint32_t average;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport
+ || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE || q >= RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id);
+ queue_id = queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + q);
+
+ average = 0;
+
+ for (count = 0; count < qavg_ntimes; count++) {
+ rte_sched_queue_read_stats(port, queue_id, &stats, &qlen);
+ average += qlen;
+ usleep(qavg_period);
+ }
+
+ average /= qavg_ntimes;
+
+ printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average);
+
+ return 0;
+}
+
+int
+qavg_tcpipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id, uint8_t tc)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint32_t queue_id, count, i;
+ uint32_t average, part_average;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport
+ || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id);
+
+ average = 0;
+
+ for (count = 0; count < qavg_ntimes; count++) {
+ part_average = 0;
+ for (i = 0; i < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + i), &stats, &qlen);
+ part_average += qlen;
+ }
+ average += part_average / RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS;
+ usleep(qavg_period);
+ }
+
+ average /= qavg_ntimes;
+
+ printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average);
+
+ return 0;
+}
+
+int
+qavg_pipe(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint32_t queue_id, count, i;
+ uint32_t average, part_average;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id);
+
+ average = 0;
+
+ for (count = 0; count < qavg_ntimes; count++) {
+ part_average = 0;
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; i++) {
+ rte_sched_queue_read_stats(port, queue_id + i, &stats, &qlen);
+ part_average += qlen;
+ }
+ average += part_average / (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ usleep(qavg_period);
+ }
+
+ average /= qavg_ntimes;
+
+ printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average);
+
+ return 0;
+}
+
+int
+qavg_tcsubport(uint8_t port_id, uint32_t subport_id, uint8_t tc)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint32_t queue_id, count, i, j;
+ uint32_t average, part_average;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || tc >= RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ average = 0;
+
+ for (count = 0; count < qavg_ntimes; count++) {
+ part_average = 0;
+ for (i = 0; i < port_params.n_pipes_per_subport; i++) {
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i);
+
+ for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
+ rte_sched_queue_read_stats(port, queue_id + (tc * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
+ part_average += qlen;
+ }
+ }
+
+ average += part_average / (port_params.n_pipes_per_subport * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ usleep(qavg_period);
+ }
+
+ average /= qavg_ntimes;
+
+ printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average);
+
+ return 0;
+}
+
+int
+qavg_subport(uint8_t port_id, uint32_t subport_id)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint32_t queue_id, count, i, j;
+ uint32_t average, part_average;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ average = 0;
+
+ for (count = 0; count < qavg_ntimes; count++) {
+ part_average = 0;
+ for (i = 0; i < port_params.n_pipes_per_subport; i++) {
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + i);
+
+ for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
+ rte_sched_queue_read_stats(port, queue_id + j, &stats, &qlen);
+ part_average += qlen;
+ }
+ }
+
+ average += part_average / (port_params.n_pipes_per_subport * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS);
+ usleep(qavg_period);
+ }
+
+ average /= qavg_ntimes;
+
+ printf("\nAverage queue size: %" PRIu32 " bytes.\n\n", average);
+
+ return 0;
+}
+
+int
+subport_stat(uint8_t port_id, uint32_t subport_id)
+{
+ struct rte_sched_subport_stats stats;
+ struct rte_sched_port *port;
+ uint32_t tc_ov[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE];
+ uint8_t i;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+ memset (tc_ov, 0, sizeof(tc_ov));
+
+ rte_sched_subport_read_stats(port, subport_id, &stats, tc_ov);
+
+ printf("\n");
+ printf("+----+-------------+-------------+-------------+-------------+-------------+\n");
+ printf("| TC | Pkts OK |Pkts Dropped | Bytes OK |Bytes Dropped| OV Status |\n");
+ printf("+----+-------------+-------------+-------------+-------------+-------------+\n");
+
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+ printf("| %d | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " |\n", i,
+ stats.n_pkts_tc[i], stats.n_pkts_tc_dropped[i],
+ stats.n_bytes_tc[i], stats.n_bytes_tc_dropped[i], tc_ov[i]);
+ printf("+----+-------------+-------------+-------------+-------------+-------------+\n");
+ }
+ printf("\n");
+
+ return 0;
+}
+
+int
+pipe_stat(uint8_t port_id, uint32_t subport_id, uint32_t pipe_id)
+{
+ struct rte_sched_queue_stats stats;
+ struct rte_sched_port *port;
+ uint16_t qlen;
+ uint8_t i, j;
+ uint32_t queue_id;
+
+ for (i = 0; i < nb_pfc; i++) {
+ if (qos_conf[i].tx_port == port_id)
+ break;
+ }
+ if (i == nb_pfc || subport_id >= port_params.n_subports_per_port || pipe_id >= port_params.n_pipes_per_subport)
+ return -1;
+
+ port = qos_conf[i].sched_port;
+
+ queue_id = RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS * (subport_id * port_params.n_pipes_per_subport + pipe_id);
+
+ printf("\n");
+ printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n");
+ printf("| TC | Queue | Pkts OK |Pkts Dropped | Bytes OK |Bytes Dropped| Length |\n");
+ printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n");
+
+ for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) {
+ for (j = 0; j < RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; j++) {
+
+ rte_sched_queue_read_stats(port, queue_id + (i * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + j), &stats, &qlen);
+
+ printf("| %d | %d | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11" PRIu32 " | %11i |\n", i, j,
+ stats.n_pkts, stats.n_pkts_dropped, stats.n_bytes, stats.n_bytes_dropped, qlen);
+ printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n");
+ }
+ if (i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE - 1)
+ printf("+----+-------+-------------+-------------+-------------+-------------+-------------+\n");
+ }
+ printf("\n");
+
+ return 0;
+}
diff --git a/examples/quota_watermark/Makefile b/examples/quota_watermark/Makefile
new file mode 100644
index 00000000..17fe473b
--- /dev/null
+++ b/examples/quota_watermark/Makefile
@@ -0,0 +1,44 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += qw
+DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += qwctl
+
+include $(RTE_SDK)/mk/rte.extsubdir.mk
diff --git a/examples/quota_watermark/include/conf.h b/examples/quota_watermark/include/conf.h
new file mode 100644
index 00000000..bdc3bbed
--- /dev/null
+++ b/examples/quota_watermark/include/conf.h
@@ -0,0 +1,48 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _CONF_H_
+#define _CONF_H_
+
+#define RING_SIZE 1024
+#define MAX_PKT_QUOTA 64
+
+#define RX_DESC_PER_QUEUE 128
+#define TX_DESC_PER_QUEUE 512
+
+#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
+#define MBUF_PER_POOL 8192
+
+#define QUOTA_WATERMARK_MEMZONE_NAME "qw_global_vars"
+
+#endif /* _CONF_H_ */
diff --git a/examples/quota_watermark/qw/Makefile b/examples/quota_watermark/qw/Makefile
new file mode 100644
index 00000000..fac9328d
--- /dev/null
+++ b/examples/quota_watermark/qw/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = qw
+
+# all source are stored in SRCS-y
+SRCS-y := args.c init.c main.c
+
+CFLAGS += -O3 -DQW_SOFTWARE_FC
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/quota_watermark/qw/args.c b/examples/quota_watermark/qw/args.c
new file mode 100644
index 00000000..408b54d1
--- /dev/null
+++ b/examples/quota_watermark/qw/args.c
@@ -0,0 +1,104 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <rte_common.h>
+#include <rte_lcore.h>
+
+#include "args.h"
+
+
+unsigned int portmask = 0;
+
+
+static void
+usage(const char *prgname)
+{
+ fprintf(stderr, "Usage: %s [EAL args] -- -p <portmask>\n"
+ "-p PORTMASK: hexadecimal bitmask of NIC ports to configure\n",
+ prgname);
+}
+
+static unsigned long
+parse_portmask(const char *portmask_str)
+{
+ return strtoul(portmask_str, NULL, 16);
+}
+
+static void
+check_core_count(void)
+{
+ if (rte_lcore_count() < 3)
+ rte_exit(EXIT_FAILURE, "At least 3 cores need to be passed in the coremask\n");
+}
+
+static void
+check_portmask_value(unsigned int portmask)
+{
+ unsigned int port_nb = 0;
+
+ port_nb = __builtin_popcount(portmask);
+
+ if (port_nb == 0)
+ rte_exit(EXIT_FAILURE, "At least 2 ports need to be passed in the portmask\n");
+
+ if (port_nb % 2 != 0)
+ rte_exit(EXIT_FAILURE, "An even number of ports is required in the portmask\n");
+}
+
+int
+parse_qw_args(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "h:p:")) != -1) {
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'p':
+ portmask = parse_portmask(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ check_core_count();
+ check_portmask_value(portmask);
+
+ return 0;
+}
diff --git a/examples/quota_watermark/qw/args.h b/examples/quota_watermark/qw/args.h
new file mode 100644
index 00000000..d6d4fb62
--- /dev/null
+++ b/examples/quota_watermark/qw/args.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _ARGS_H_
+#define _ARGS_H_
+
+extern unsigned int portmask;
+
+int parse_qw_args(int argc, char **argv);
+
+#endif /* _ARGS_H_ */
diff --git a/examples/quota_watermark/qw/init.c b/examples/quota_watermark/qw/init.c
new file mode 100644
index 00000000..afc13665
--- /dev/null
+++ b/examples/quota_watermark/qw/init.c
@@ -0,0 +1,174 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+
+#include <rte_common.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_memzone.h>
+#include <rte_ring.h>
+#include <rte_string_fns.h>
+
+#include "args.h"
+#include "init.h"
+#include "main.h"
+#include "../include/conf.h"
+
+
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_DCB_NONE,
+ },
+};
+
+static struct rte_eth_fc_conf fc_conf = {
+ .mode = RTE_FC_TX_PAUSE,
+ .high_water = 80 * 510 / 100,
+ .low_water = 60 * 510 / 100,
+ .pause_time = 1337,
+ .send_xon = 0,
+};
+
+
+void configure_eth_port(uint8_t port_id)
+{
+ int ret;
+
+ rte_eth_dev_stop(port_id);
+
+ ret = rte_eth_dev_configure(port_id, 1, 1, &port_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot configure port %u (error %d)\n",
+ (unsigned) port_id, ret);
+
+ /* Initialize the port's RX queue */
+ ret = rte_eth_rx_queue_setup(port_id, 0, RX_DESC_PER_QUEUE,
+ rte_eth_dev_socket_id(port_id),
+ NULL,
+ mbuf_pool);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Failed to setup RX queue on "
+ "port %u (error %d)\n", (unsigned) port_id, ret);
+
+ /* Initialize the port's TX queue */
+ ret = rte_eth_tx_queue_setup(port_id, 0, TX_DESC_PER_QUEUE,
+ rte_eth_dev_socket_id(port_id),
+ NULL);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Failed to setup TX queue on "
+ "port %u (error %d)\n", (unsigned) port_id, ret);
+
+ /* Initialize the port's flow control */
+ ret = rte_eth_dev_flow_ctrl_set(port_id, &fc_conf);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Failed to setup hardware flow control on "
+ "port %u (error %d)\n", (unsigned) port_id, ret);
+
+ /* Start the port */
+ ret = rte_eth_dev_start(port_id);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Failed to start port %u (error %d)\n",
+ (unsigned) port_id, ret);
+
+ /* Put it in promiscuous mode */
+ rte_eth_promiscuous_enable(port_id);
+}
+
+void
+init_dpdk(void)
+{
+ if (rte_eth_dev_count() < 2)
+ rte_exit(EXIT_FAILURE, "Not enough ethernet port available\n");
+}
+
+void init_ring(int lcore_id, uint8_t port_id)
+{
+ struct rte_ring *ring;
+ char ring_name[RTE_RING_NAMESIZE];
+
+ snprintf(ring_name, RTE_RING_NAMESIZE,
+ "core%d_port%d", lcore_id, port_id);
+ ring = rte_ring_create(ring_name, RING_SIZE, rte_socket_id(),
+ RING_F_SP_ENQ | RING_F_SC_DEQ);
+
+ if (ring == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+
+ rte_ring_set_water_mark(ring, 80 * RING_SIZE / 100);
+
+ rings[lcore_id][port_id] = ring;
+}
+
+void
+pair_ports(void)
+{
+ uint8_t i, j;
+
+ /* Pair ports with their "closest neighbour" in the portmask */
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++)
+ if (is_bit_set(i, portmask))
+ for (j = (uint8_t) (i + 1); j < RTE_MAX_ETHPORTS; j++)
+ if (is_bit_set(j, portmask)) {
+ port_pairs[i] = j;
+ port_pairs[j] = i;
+ i = j;
+ break;
+ }
+}
+
+void
+setup_shared_variables(void)
+{
+ const struct rte_memzone *qw_memzone;
+
+ qw_memzone = rte_memzone_reserve(QUOTA_WATERMARK_MEMZONE_NAME, 2 * sizeof(int),
+ rte_socket_id(), RTE_MEMZONE_2MB);
+ if (qw_memzone == NULL)
+ rte_exit(EXIT_FAILURE, "%s\n", rte_strerror(rte_errno));
+
+ quota = qw_memzone->addr;
+ low_watermark = (unsigned int *) qw_memzone->addr + sizeof(int);
+}
diff --git a/examples/quota_watermark/qw/init.h b/examples/quota_watermark/qw/init.h
new file mode 100644
index 00000000..6d0af3ab
--- /dev/null
+++ b/examples/quota_watermark/qw/init.h
@@ -0,0 +1,43 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _INIT_H_
+#define _INIT_H_
+
+void configure_eth_port(uint8_t port_id);
+void init_dpdk(void);
+void init_ring(int lcore_id, uint8_t port_id);
+void pair_ports(void);
+void setup_shared_variables(void);
+
+#endif /* _INIT_H_ */
diff --git a/examples/quota_watermark/qw/main.c b/examples/quota_watermark/qw/main.c
new file mode 100644
index 00000000..8ed02148
--- /dev/null
+++ b/examples/quota_watermark/qw/main.c
@@ -0,0 +1,372 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <rte_eal.h>
+
+#include <rte_common.h>
+#include <rte_debug.h>
+#include <rte_errno.h>
+#include <rte_ethdev.h>
+#include <rte_launch.h>
+#include <rte_lcore.h>
+#include <rte_log.h>
+#include <rte_mbuf.h>
+#include <rte_ring.h>
+
+#include <rte_byteorder.h>
+
+#include "args.h"
+#include "main.h"
+#include "init.h"
+#include "../include/conf.h"
+
+
+#ifdef QW_SOFTWARE_FC
+#define SEND_PAUSE_FRAME(port_id, duration) send_pause_frame(port_id, duration)
+#else
+#define SEND_PAUSE_FRAME(port_id, duration) do { } while(0)
+#endif
+
+#define ETHER_TYPE_FLOW_CONTROL 0x8808
+
+struct ether_fc_frame {
+ uint16_t opcode;
+ uint16_t param;
+} __attribute__((__packed__));
+
+
+int *quota;
+unsigned int *low_watermark;
+
+uint8_t port_pairs[RTE_MAX_ETHPORTS];
+
+struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS];
+struct rte_mempool *mbuf_pool;
+
+
+static void send_pause_frame(uint8_t port_id, uint16_t duration)
+{
+ struct rte_mbuf *mbuf;
+ struct ether_fc_frame *pause_frame;
+ struct ether_hdr *hdr;
+ struct ether_addr mac_addr;
+
+ RTE_LOG(DEBUG, USER1, "Sending PAUSE frame (duration=%d) on port %d\n",
+ duration, port_id);
+
+ /* Get a mbuf from the pool */
+ mbuf = rte_pktmbuf_alloc(mbuf_pool);
+ if (unlikely(mbuf == NULL))
+ return;
+
+ /* Prepare a PAUSE frame */
+ hdr = rte_pktmbuf_mtod(mbuf, struct ether_hdr *);
+ pause_frame = (struct ether_fc_frame *) &hdr[1];
+
+ rte_eth_macaddr_get(port_id, &mac_addr);
+ ether_addr_copy(&mac_addr, &hdr->s_addr);
+
+ void *tmp = &hdr->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x010000C28001ULL;
+
+ hdr->ether_type = rte_cpu_to_be_16(ETHER_TYPE_FLOW_CONTROL);
+
+ pause_frame->opcode = rte_cpu_to_be_16(0x0001);
+ pause_frame->param = rte_cpu_to_be_16(duration);
+
+ mbuf->pkt_len = 60;
+ mbuf->data_len = 60;
+
+ rte_eth_tx_burst(port_id, 0, &mbuf, 1);
+}
+
+/**
+ * Get the previous enabled lcore ID
+ *
+ * @param lcore_id
+ * The current lcore ID.
+ * @return
+ * The previous enabled lcore_id or -1 if not found.
+ */
+static unsigned int
+get_previous_lcore_id(unsigned int lcore_id)
+{
+ int i;
+
+ for (i = lcore_id - 1; i >= 0; i--)
+ if (rte_lcore_is_enabled(i))
+ return i;
+
+ return -1;
+}
+
+/**
+ * Get the last enabled lcore ID
+ *
+ * @return
+ * The last enabled lcore_id.
+ */
+static unsigned int
+get_last_lcore_id(void)
+{
+ int i;
+
+ for (i = RTE_MAX_LCORE; i >= 0; i--)
+ if (rte_lcore_is_enabled(i))
+ return i;
+
+ return 0;
+}
+
+static void
+receive_stage(__attribute__((unused)) void *args)
+{
+ int i, ret;
+
+ uint8_t port_id;
+ uint16_t nb_rx_pkts;
+
+ unsigned int lcore_id;
+
+ struct rte_mbuf *pkts[MAX_PKT_QUOTA];
+ struct rte_ring *ring;
+ enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY };
+
+ lcore_id = rte_lcore_id();
+
+ RTE_LOG(INFO, USER1,
+ "%s() started on core %u\n", __func__, lcore_id);
+
+ while (1) {
+
+ /* Process each port round robin style */
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+
+ if (!is_bit_set(port_id, portmask))
+ continue;
+
+ ring = rings[lcore_id][port_id];
+
+ if (ring_state[port_id] != RING_READY) {
+ if (rte_ring_count(ring) > *low_watermark)
+ continue;
+ else
+ ring_state[port_id] = RING_READY;
+ }
+
+ /* Enqueue received packets on the RX ring */
+ nb_rx_pkts = rte_eth_rx_burst(port_id, 0, pkts, (uint16_t) *quota);
+ ret = rte_ring_enqueue_bulk(ring, (void *) pkts, nb_rx_pkts);
+ if (ret == -EDQUOT) {
+ ring_state[port_id] = RING_OVERLOADED;
+ send_pause_frame(port_id, 1337);
+ }
+
+ else if (ret == -ENOBUFS) {
+
+ /* Return mbufs to the pool, effectively dropping packets */
+ for (i = 0; i < nb_rx_pkts; i++)
+ rte_pktmbuf_free(pkts[i]);
+ }
+ }
+ }
+}
+
+static void
+pipeline_stage(__attribute__((unused)) void *args)
+{
+ int i, ret;
+ int nb_dq_pkts;
+
+ uint8_t port_id;
+
+ unsigned int lcore_id, previous_lcore_id;
+
+ void *pkts[MAX_PKT_QUOTA];
+ struct rte_ring *rx, *tx;
+ enum ring_state ring_state[RTE_MAX_ETHPORTS] = { RING_READY };
+
+ lcore_id = rte_lcore_id();
+ previous_lcore_id = get_previous_lcore_id(lcore_id);
+
+ RTE_LOG(INFO, USER1,
+ "%s() started on core %u - processing packets from core %u\n",
+ __func__, lcore_id, previous_lcore_id);
+
+ while (1) {
+
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+
+ if (!is_bit_set(port_id, portmask))
+ continue;
+
+ tx = rings[lcore_id][port_id];
+ rx = rings[previous_lcore_id][port_id];
+
+ if (ring_state[port_id] != RING_READY) {
+ if (rte_ring_count(tx) > *low_watermark)
+ continue;
+ else
+ ring_state[port_id] = RING_READY;
+ }
+
+ /* Dequeue up to quota mbuf from rx */
+ nb_dq_pkts = rte_ring_dequeue_burst(rx, pkts, *quota);
+ if (unlikely(nb_dq_pkts < 0))
+ continue;
+
+ /* Enqueue them on tx */
+ ret = rte_ring_enqueue_bulk(tx, pkts, nb_dq_pkts);
+ if (ret == -EDQUOT)
+ ring_state[port_id] = RING_OVERLOADED;
+
+ else if (ret == -ENOBUFS) {
+
+ /* Return mbufs to the pool, effectively dropping packets */
+ for (i = 0; i < nb_dq_pkts; i++)
+ rte_pktmbuf_free(pkts[i]);
+ }
+ }
+ }
+}
+
+static void
+send_stage(__attribute__((unused)) void *args)
+{
+ uint16_t nb_dq_pkts;
+
+ uint8_t port_id;
+ uint8_t dest_port_id;
+
+ unsigned int lcore_id, previous_lcore_id;
+
+ struct rte_ring *tx;
+ struct rte_mbuf *tx_pkts[MAX_PKT_QUOTA];
+
+ lcore_id = rte_lcore_id();
+ previous_lcore_id = get_previous_lcore_id(lcore_id);
+
+ RTE_LOG(INFO, USER1,
+ "%s() started on core %u - processing packets from core %u\n",
+ __func__, lcore_id, previous_lcore_id);
+
+ while (1) {
+
+ /* Process each ring round robin style */
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++) {
+
+ if (!is_bit_set(port_id, portmask))
+ continue;
+
+ dest_port_id = port_pairs[port_id];
+ tx = rings[previous_lcore_id][port_id];
+
+ if (rte_ring_empty(tx))
+ continue;
+
+ /* Dequeue packets from tx and send them */
+ nb_dq_pkts = (uint16_t) rte_ring_dequeue_burst(tx, (void *) tx_pkts, *quota);
+ rte_eth_tx_burst(dest_port_id, 0, tx_pkts, nb_dq_pkts);
+
+ /* TODO: Check if nb_dq_pkts == nb_tx_pkts? */
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned int lcore_id, master_lcore_id, last_lcore_id;
+
+ uint8_t port_id;
+
+ rte_set_log_level(RTE_LOG_INFO);
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n");
+
+ argc -= ret;
+ argv += ret;
+
+ init_dpdk();
+ setup_shared_variables();
+
+ *quota = 32;
+ *low_watermark = 60 * RING_SIZE / 100;
+
+ last_lcore_id = get_last_lcore_id();
+ master_lcore_id = rte_get_master_lcore();
+
+ /* Parse the application's arguments */
+ ret = parse_qw_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid quota/watermark argument(s)\n");
+
+ /* Create a pool of mbuf to store packets */
+ mbuf_pool = rte_pktmbuf_pool_create("mbuf_pool", MBUF_PER_POOL, 32, 0,
+ MBUF_DATA_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_panic("%s\n", rte_strerror(rte_errno));
+
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++)
+ if (is_bit_set(port_id, portmask)) {
+ configure_eth_port(port_id);
+ init_ring(master_lcore_id, port_id);
+ }
+
+ pair_ports();
+
+ /* Start pipeline_connect() on all the available slave lcore but the last */
+ for (lcore_id = 0 ; lcore_id < last_lcore_id; lcore_id++) {
+ if (rte_lcore_is_enabled(lcore_id) && lcore_id != master_lcore_id) {
+
+ for (port_id = 0; port_id < RTE_MAX_ETHPORTS; port_id++)
+ if (is_bit_set(port_id, portmask))
+ init_ring(lcore_id, port_id);
+
+ /* typecast is a workaround for GCC 4.3 bug */
+ rte_eal_remote_launch((int (*)(void *))pipeline_stage, NULL, lcore_id);
+ }
+ }
+
+ /* Start send_stage() on the last slave core */
+ /* typecast is a workaround for GCC 4.3 bug */
+ rte_eal_remote_launch((int (*)(void *))send_stage, NULL, last_lcore_id);
+
+ /* Start receive_stage() on the master core */
+ receive_stage(NULL);
+
+ return 0;
+}
diff --git a/examples/quota_watermark/qw/main.h b/examples/quota_watermark/qw/main.h
new file mode 100644
index 00000000..6b364898
--- /dev/null
+++ b/examples/quota_watermark/qw/main.h
@@ -0,0 +1,59 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#include "../include/conf.h"
+
+enum ring_state {
+ RING_READY,
+ RING_OVERLOADED,
+};
+
+extern int *quota;
+extern unsigned int *low_watermark;
+
+extern uint8_t port_pairs[RTE_MAX_ETHPORTS];
+
+extern struct rte_ring *rings[RTE_MAX_LCORE][RTE_MAX_ETHPORTS];
+extern struct rte_mempool *mbuf_pool;
+
+
+static inline int
+is_bit_set(int i, unsigned int mask)
+{
+ return (1 << i) & mask;
+}
+
+#endif /* _MAIN_H_ */
diff --git a/examples/quota_watermark/qwctl/Makefile b/examples/quota_watermark/qwctl/Makefile
new file mode 100644
index 00000000..1ca2f1e9
--- /dev/null
+++ b/examples/quota_watermark/qwctl/Makefile
@@ -0,0 +1,50 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = qwctl
+
+# all source are stored in SRCS-y
+SRCS-y := commands.c qwctl.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/quota_watermark/qwctl/commands.c b/examples/quota_watermark/qwctl/commands.c
new file mode 100644
index 00000000..5348dd3d
--- /dev/null
+++ b/examples/quota_watermark/qwctl/commands.c
@@ -0,0 +1,217 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string.h>
+#include <termios.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_parse_string.h>
+#include <cmdline.h>
+
+#include <rte_ring.h>
+
+#include "qwctl.h"
+#include "../include/conf.h"
+
+
+/**
+ * help command
+ */
+
+struct cmd_help_tokens {
+ cmdline_fixed_string_t verb;
+};
+
+cmdline_parse_token_string_t cmd_help_verb =
+ TOKEN_STRING_INITIALIZER(struct cmd_help_tokens, verb, "help");
+
+static void
+cmd_help_handler(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ cmdline_printf(cl, "Available commands:\n"
+ "- help\n"
+ "- set [ring_name|variable] <value>\n"
+ "- show [ring_name|variable]\n"
+ "\n"
+ "Available variables:\n"
+ "- low_watermark\n"
+ "- quota\n"
+ "- ring names follow the core%%u_port%%u format\n");
+}
+
+cmdline_parse_inst_t cmd_help = {
+ .f = cmd_help_handler,
+ .data = NULL,
+ .help_str = "show help",
+ .tokens = {
+ (void *) &cmd_help_verb,
+ NULL,
+ },
+};
+
+
+/**
+ * set command
+ */
+
+struct cmd_set_tokens {
+ cmdline_fixed_string_t verb;
+ cmdline_fixed_string_t variable;
+ uint32_t value;
+};
+
+cmdline_parse_token_string_t cmd_set_verb =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_tokens, verb, "set");
+
+cmdline_parse_token_string_t cmd_set_variable =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_tokens, variable, NULL);
+
+cmdline_parse_token_num_t cmd_set_value =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_tokens, value, UINT32);
+
+static void
+cmd_set_handler(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_set_tokens *tokens = parsed_result;
+ struct rte_ring *ring;
+
+ if (!strcmp(tokens->variable, "quota")) {
+
+ if (tokens->value > 0 && tokens->value <= MAX_PKT_QUOTA)
+ *quota = tokens->value;
+ else
+ cmdline_printf(cl, "quota must be between 1 and %u\n", MAX_PKT_QUOTA);
+ }
+
+ else if (!strcmp(tokens->variable, "low_watermark")) {
+
+ if (tokens->value <= 100)
+ *low_watermark = tokens->value * RING_SIZE / 100;
+ else
+ cmdline_printf(cl, "low_watermark must be between 0%% and 100%%\n");
+ }
+
+ else {
+
+ ring = rte_ring_lookup(tokens->variable);
+ if (ring == NULL)
+ cmdline_printf(cl, "Cannot find ring \"%s\"\n", tokens->variable);
+ else
+ if (tokens->value >= *low_watermark * 100 / RING_SIZE
+ && tokens->value <= 100)
+ rte_ring_set_water_mark(ring, tokens->value * RING_SIZE / 100);
+ else
+ cmdline_printf(cl, "ring high watermark must be between %u%% "
+ "and 100%%\n", *low_watermark * 100 / RING_SIZE);
+ }
+}
+
+cmdline_parse_inst_t cmd_set = {
+ .f = cmd_set_handler,
+ .data = NULL,
+ .help_str = "Set a variable value",
+ .tokens = {
+ (void *) &cmd_set_verb,
+ (void *) &cmd_set_variable,
+ (void *) &cmd_set_value,
+ NULL,
+ },
+};
+
+
+/**
+ * show command
+ */
+
+struct cmd_show_tokens {
+ cmdline_fixed_string_t verb;
+ cmdline_fixed_string_t variable;
+};
+
+cmdline_parse_token_string_t cmd_show_verb =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_tokens, verb, "show");
+
+cmdline_parse_token_string_t cmd_show_variable =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_tokens, variable, NULL);
+
+
+static void
+cmd_show_handler(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_show_tokens *tokens = parsed_result;
+ struct rte_ring *ring;
+
+ if (!strcmp(tokens->variable, "quota"))
+ cmdline_printf(cl, "Global quota: %d\n", *quota);
+
+ else if (!strcmp(tokens->variable, "low_watermark"))
+ cmdline_printf(cl, "Global low_watermark: %u\n", *low_watermark);
+
+ else {
+
+ ring = rte_ring_lookup(tokens->variable);
+ if (ring == NULL)
+ cmdline_printf(cl, "Cannot find ring \"%s\"\n", tokens->variable);
+ else
+ rte_ring_dump(stdout, ring);
+ }
+}
+
+cmdline_parse_inst_t cmd_show = {
+ .f = cmd_show_handler,
+ .data = NULL,
+ .help_str = "Show a variable value",
+ .tokens = {
+ (void *) &cmd_show_verb,
+ (void *) &cmd_show_variable,
+ NULL,
+ },
+};
+
+
+cmdline_parse_ctx_t qwctl_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_help,
+ (cmdline_parse_inst_t *)&cmd_set,
+ (cmdline_parse_inst_t *)&cmd_show,
+ NULL,
+};
diff --git a/examples/quota_watermark/qwctl/commands.h b/examples/quota_watermark/qwctl/commands.h
new file mode 100644
index 00000000..c010941d
--- /dev/null
+++ b/examples/quota_watermark/qwctl/commands.h
@@ -0,0 +1,41 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _COMMANDS_H_
+#define _COMMANDS_H_
+
+#include <cmdline_parse.h>
+
+extern cmdline_parse_ctx_t qwctl_ctx[];
+
+#endif /* _COMMANDS_H_ */
diff --git a/examples/quota_watermark/qwctl/qwctl.c b/examples/quota_watermark/qwctl/qwctl.c
new file mode 100644
index 00000000..eb2f618a
--- /dev/null
+++ b/examples/quota_watermark/qwctl/qwctl.c
@@ -0,0 +1,95 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <termios.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include <rte_eal.h>
+
+#include <rte_log.h>
+#include <rte_memzone.h>
+#include <rte_ring.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+
+#include "qwctl.h"
+#include "commands.h"
+#include "../include/conf.h"
+
+
+int *quota;
+unsigned int *low_watermark;
+
+
+static void
+setup_shared_variables(void)
+{
+ const struct rte_memzone *qw_memzone;
+
+ qw_memzone = rte_memzone_lookup(QUOTA_WATERMARK_MEMZONE_NAME);
+ if (qw_memzone == NULL)
+ rte_exit(EXIT_FAILURE, "Couldn't find memzone\n");
+
+ quota = qw_memzone->addr;
+ low_watermark = (unsigned int *) qw_memzone->addr + sizeof(int);
+}
+
+int main(int argc, char **argv)
+{
+ int ret;
+ struct cmdline *cl;
+
+ rte_set_log_level(RTE_LOG_INFO);
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize EAL\n");
+
+ setup_shared_variables();
+
+ cl = cmdline_stdin_new(qwctl_ctx, "qwctl> ");
+ if (cl == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create cmdline instance\n");
+
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+
+ return 0;
+}
diff --git a/examples/quota_watermark/qwctl/qwctl.h b/examples/quota_watermark/qwctl/qwctl.h
new file mode 100644
index 00000000..8d146e57
--- /dev/null
+++ b/examples/quota_watermark/qwctl/qwctl.h
@@ -0,0 +1,40 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+extern int *quota;
+extern unsigned int *low_watermark;
+
+#endif /* _MAIN_H_ */
diff --git a/examples/rxtx_callbacks/Makefile b/examples/rxtx_callbacks/Makefile
new file mode 100644
index 00000000..0fafbb72
--- /dev/null
+++ b/examples/rxtx_callbacks/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = rxtx_callbacks
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/rxtx_callbacks/main.c b/examples/rxtx_callbacks/main.c
new file mode 100644
index 00000000..048b23f5
--- /dev/null
+++ b/examples/rxtx_callbacks/main.c
@@ -0,0 +1,225 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN, },
+};
+
+static unsigned nb_ports;
+
+static struct {
+ uint64_t total_cycles;
+ uint64_t total_pkts;
+} latency_numbers;
+
+
+static uint16_t
+add_timestamps(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts,
+ uint16_t max_pkts __rte_unused, void *_ __rte_unused)
+{
+ unsigned i;
+ uint64_t now = rte_rdtsc();
+
+ for (i = 0; i < nb_pkts; i++)
+ pkts[i]->udata64 = now;
+ return nb_pkts;
+}
+
+static uint16_t
+calc_latency(uint8_t port __rte_unused, uint16_t qidx __rte_unused,
+ struct rte_mbuf **pkts, uint16_t nb_pkts, void *_ __rte_unused)
+{
+ uint64_t cycles = 0;
+ uint64_t now = rte_rdtsc();
+ unsigned i;
+
+ for (i = 0; i < nb_pkts; i++)
+ cycles += now - pkts[i]->udata64;
+ latency_numbers.total_cycles += cycles;
+ latency_numbers.total_pkts += nb_pkts;
+
+ if (latency_numbers.total_pkts > (100 * 1000 * 1000ULL)) {
+ printf("Latency = %"PRIu64" cycles\n",
+ latency_numbers.total_cycles / latency_numbers.total_pkts);
+ latency_numbers.total_cycles = latency_numbers.total_pkts = 0;
+ }
+ return nb_pkts;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ struct ether_addr addr;
+
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ rte_eth_promiscuous_enable(port);
+ rte_eth_add_rx_callback(port, 0, add_timestamps, NULL);
+ rte_eth_add_tx_callback(port, 0, calc_latency, NULL);
+
+ return 0;
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ uint8_t port;
+
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+ for (;;) {
+ for (port = 0; port < nb_ports; port++) {
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+ if (unlikely(nb_rx == 0))
+ continue;
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ uint8_t portid;
+
+ /* init EAL */
+ int ret = rte_eal_init(argc, argv);
+
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS * nb_ports, MBUF_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8"\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too much enabled lcores - "
+ "App uses only 1 lcore\n");
+
+ /* call lcore_main on master core only */
+ lcore_main();
+ return 0;
+}
diff --git a/examples/skeleton/Makefile b/examples/skeleton/Makefile
new file mode 100644
index 00000000..4a5d99f1
--- /dev/null
+++ b/examples/skeleton/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = basicfwd
+
+# all source are stored in SRCS-y
+SRCS-y := basicfwd.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+EXTRA_CFLAGS += -O3 -g -Wfatal-errors
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/skeleton/basicfwd.c b/examples/skeleton/basicfwd.c
new file mode 100644
index 00000000..c89822cb
--- /dev/null
+++ b/examples/skeleton/basicfwd.c
@@ -0,0 +1,211 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <inttypes.h>
+#include <rte_eal.h>
+#include <rte_ethdev.h>
+#include <rte_cycles.h>
+#include <rte_lcore.h>
+#include <rte_mbuf.h>
+
+#define RX_RING_SIZE 128
+#define TX_RING_SIZE 512
+
+#define NUM_MBUFS 8191
+#define MBUF_CACHE_SIZE 250
+#define BURST_SIZE 32
+
+static const struct rte_eth_conf port_conf_default = {
+ .rxmode = { .max_rx_pkt_len = ETHER_MAX_LEN }
+};
+
+/* basicfwd.c: Basic DPDK skeleton forwarding example. */
+
+/*
+ * Initializes a given port using global settings and with the RX buffers
+ * coming from the mbuf_pool passed as a parameter.
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_conf port_conf = port_conf_default;
+ const uint16_t rx_rings = 1, tx_rings = 1;
+ int retval;
+ uint16_t q;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ /* Configure the Ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Allocate and set up 1 RX queue per Ethernet port. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, RX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL, mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Allocate and set up 1 TX queue per Ethernet port. */
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, TX_RING_SIZE,
+ rte_eth_dev_socket_id(port), NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the Ethernet port. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Display the port MAC address. */
+ struct ether_addr addr;
+ rte_eth_macaddr_get(port, &addr);
+ printf("Port %u MAC: %02" PRIx8 " %02" PRIx8 " %02" PRIx8
+ " %02" PRIx8 " %02" PRIx8 " %02" PRIx8 "\n",
+ (unsigned)port,
+ addr.addr_bytes[0], addr.addr_bytes[1],
+ addr.addr_bytes[2], addr.addr_bytes[3],
+ addr.addr_bytes[4], addr.addr_bytes[5]);
+
+ /* Enable RX in promiscuous mode for the Ethernet device. */
+ rte_eth_promiscuous_enable(port);
+
+ return 0;
+}
+
+/*
+ * The lcore main. This is the main thread that does the work, reading from
+ * an input port and writing to an output port.
+ */
+static __attribute__((noreturn)) void
+lcore_main(void)
+{
+ const uint8_t nb_ports = rte_eth_dev_count();
+ uint8_t port;
+
+ /*
+ * Check that the port is on the same NUMA node as the polling thread
+ * for best performance.
+ */
+ for (port = 0; port < nb_ports; port++)
+ if (rte_eth_dev_socket_id(port) > 0 &&
+ rte_eth_dev_socket_id(port) !=
+ (int)rte_socket_id())
+ printf("WARNING, port %u is on remote NUMA node to "
+ "polling thread.\n\tPerformance will "
+ "not be optimal.\n", port);
+
+ printf("\nCore %u forwarding packets. [Ctrl+C to quit]\n",
+ rte_lcore_id());
+
+ /* Run until the application is quit or killed. */
+ for (;;) {
+ /*
+ * Receive packets on a port and forward them on the paired
+ * port. The mapping is 0 -> 1, 1 -> 0, 2 -> 3, 3 -> 2, etc.
+ */
+ for (port = 0; port < nb_ports; port++) {
+
+ /* Get burst of RX packets, from first port of pair. */
+ struct rte_mbuf *bufs[BURST_SIZE];
+ const uint16_t nb_rx = rte_eth_rx_burst(port, 0,
+ bufs, BURST_SIZE);
+
+ if (unlikely(nb_rx == 0))
+ continue;
+
+ /* Send burst of TX packets, to second port of pair. */
+ const uint16_t nb_tx = rte_eth_tx_burst(port ^ 1, 0,
+ bufs, nb_rx);
+
+ /* Free any unsent packets. */
+ if (unlikely(nb_tx < nb_rx)) {
+ uint16_t buf;
+ for (buf = nb_tx; buf < nb_rx; buf++)
+ rte_pktmbuf_free(bufs[buf]);
+ }
+ }
+ }
+}
+
+/*
+ * The main function, which does initialization and calls the per-lcore
+ * functions.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ unsigned nb_ports;
+ uint8_t portid;
+
+ /* Initialize the Environment Abstraction Layer (EAL). */
+ int ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+
+ argc -= ret;
+ argv += ret;
+
+ /* Check that there is an even number of ports to send/receive on. */
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports < 2 || (nb_ports & 1))
+ rte_exit(EXIT_FAILURE, "Error: number of ports must be even\n");
+
+ /* Creates a new mempool in memory to hold the mbufs. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL", NUM_MBUFS * nb_ports,
+ MBUF_CACHE_SIZE, 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* Initialize all ports. */
+ for (portid = 0; portid < nb_ports; portid++)
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot init port %"PRIu8 "\n",
+ portid);
+
+ if (rte_lcore_count() > 1)
+ printf("\nWARNING: Too many lcores enabled. Only 1 used.\n");
+
+ /* Call lcore_main on the master core only. */
+ lcore_main();
+
+ return 0;
+}
diff --git a/examples/tep_termination/Makefile b/examples/tep_termination/Makefile
new file mode 100644
index 00000000..448e6183
--- /dev/null
+++ b/examples/tep_termination/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(error This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+endif
+
+# binary name
+APP = tep_termination
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vxlan_setup.c vxlan.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/tep_termination/main.c b/examples/tep_termination/main.c
new file mode 100644
index 00000000..f97d552a
--- /dev/null
+++ b/examples/tep_termination/main.c
@@ -0,0 +1,1275 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arpa/inet.h>
+#include <getopt.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/eventfd.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+
+#include "main.h"
+#include "vxlan.h"
+#include "vxlan_setup.h"
+
+/* the maximum number of external ports supported */
+#define MAX_SUP_PORTS 1
+
+/**
+ * Calculate the number of buffers needed per port
+ */
+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES * RTE_TEST_RX_DESC_DEFAULT) +\
+ (nb_switching_cores * MAX_PKT_BURST) +\
+ (nb_switching_cores * \
+ RTE_TEST_TX_DESC_DEFAULT) +\
+ (nb_switching_cores * MBUF_CACHE_SIZE))
+
+#define MBUF_CACHE_SIZE 128
+#define MBUF_SIZE (2048 + sizeof(struct rte_mbuf) + RTE_PKTMBUF_HEADROOM)
+
+#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/* Defines how long we wait between retries on RX */
+#define BURST_RX_WAIT_US 15
+
+#define BURST_RX_RETRIES 4 /* Number of retries on RX. */
+
+#define JUMBO_FRAME_MAX_SIZE 0x2600
+
+/* State of virtio device. */
+#define DEVICE_MAC_LEARNING 0
+#define DEVICE_RX 1
+#define DEVICE_SAFE_REMOVE 2
+
+/* Config_core_flag status definitions. */
+#define REQUEST_DEV_REMOVAL 1
+#define ACK_DEV_REMOVAL 0
+
+/* Configurable number of RX/TX ring descriptors */
+#define RTE_TEST_RX_DESC_DEFAULT 1024
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+/* Get first 4 bytes in mbuf headroom. */
+#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \
+ + sizeof(struct rte_mbuf)))
+
+#define INVALID_PORT_ID 0xFF
+
+/* Size of buffers used for snprintfs. */
+#define MAX_PRINT_BUFF 6072
+
+/* Maximum character device basename size. */
+#define MAX_BASENAME_SZ 20
+
+/* Maximum long option length for option parsing. */
+#define MAX_LONG_OPT_SZ 64
+
+/* Used to compare MAC addresses. */
+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
+
+#define CMD_LINE_OPT_NB_DEVICES "nb-devices"
+#define CMD_LINE_OPT_UDP_PORT "udp-port"
+#define CMD_LINE_OPT_TX_CHECKSUM "tx-checksum"
+#define CMD_LINE_OPT_TSO_SEGSZ "tso-segsz"
+#define CMD_LINE_OPT_FILTER_TYPE "filter-type"
+#define CMD_LINE_OPT_ENCAP "encap"
+#define CMD_LINE_OPT_DECAP "decap"
+#define CMD_LINE_OPT_RX_RETRY "rx-retry"
+#define CMD_LINE_OPT_RX_RETRY_DELAY "rx-retry-delay"
+#define CMD_LINE_OPT_RX_RETRY_NUM "rx-retry-num"
+#define CMD_LINE_OPT_STATS "stats"
+#define CMD_LINE_OPT_DEV_BASENAME "dev-basename"
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+
+/*Number of switching cores enabled*/
+static uint32_t nb_switching_cores;
+
+/* number of devices/queues to support*/
+uint16_t nb_devices = 2;
+
+/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */
+#define MAX_RING_DESC 4096
+
+struct vpool {
+ struct rte_mempool *pool;
+ struct rte_ring *ring;
+ uint32_t buf_size;
+} vpool_array[MAX_QUEUES+MAX_QUEUES];
+
+/* UDP tunneling port */
+uint16_t udp_port = 4789;
+
+/* enable/disable inner TX checksum */
+uint8_t tx_checksum = 0;
+
+/* TCP segment size */
+uint16_t tso_segsz = 0;
+
+/* enable/disable decapsulation */
+uint8_t rx_decap = 1;
+
+/* enable/disable encapsulation */
+uint8_t tx_encap = 1;
+
+/* RX filter type for tunneling packet */
+uint8_t filter_idx = 1;
+
+/* overlay packet operation */
+struct ol_switch_ops overlay_options = {
+ .port_configure = vxlan_port_init,
+ .tunnel_setup = vxlan_link,
+ .tunnel_destroy = vxlan_unlink,
+ .tx_handle = vxlan_tx_pkts,
+ .rx_handle = vxlan_rx_pkts,
+ .param_handle = NULL,
+};
+
+/* Enable stats. */
+uint32_t enable_stats = 0;
+/* Enable retries on RX. */
+static uint32_t enable_retry = 1;
+/* Specify timeout (in useconds) between retries on RX. */
+static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
+/* Specify the number of retries on RX. */
+static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
+
+/* Character device basename. Can be set by user. */
+static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
+
+static unsigned lcore_ids[RTE_MAX_LCORE];
+uint8_t ports[RTE_MAX_ETHPORTS];
+
+static unsigned nb_ports; /**< The number of ports specified in command line */
+
+/* ethernet addresses of ports */
+struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* heads for the main used and free linked lists for the data path. */
+static struct virtio_net_data_ll *ll_root_used;
+static struct virtio_net_data_ll *ll_root_free;
+
+/**
+ * Array of data core structures containing information on
+ * individual core linked lists.
+ */
+static struct lcore_info lcore_info[RTE_MAX_LCORE];
+
+/* Used for queueing bursts of TX packets. */
+struct mbuf_table {
+ unsigned len;
+ unsigned txq_id;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+/* TX queue for each data core. */
+struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
+
+struct device_statistics dev_statistics[MAX_DEVICES];
+
+/**
+ * Set character device basename.
+ */
+static int
+us_vhost_parse_basename(const char *q_arg)
+{
+ /* parse number string */
+ if (strlen(q_arg) >= MAX_BASENAME_SZ)
+ return -1;
+ else
+ snprintf((char *)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg);
+
+ return 0;
+}
+
+/**
+ * Parse the portmask provided at run time.
+ */
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/**
+ * Parse num options at run time.
+ */
+static int
+parse_num_opt(const char *q_arg, uint32_t max_valid_value)
+{
+ char *end = NULL;
+ unsigned long num;
+
+ /* parse unsigned int string */
+ num = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (num > max_valid_value)
+ return -1;
+
+ return num;
+}
+
+/**
+ * Display usage
+ */
+static void
+tep_termination_usage(const char *prgname)
+{
+ RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
+ " --udp-port: UDP destination port for VXLAN packet\n"
+ " --nb-devices[1-64]: The number of virtIO device\n"
+ " --tx-checksum [0|1]: inner Tx checksum offload\n"
+ " --tso-segsz [0-N]: TCP segment size\n"
+ " --decap [0|1]: tunneling packet decapsulation\n"
+ " --encap [0|1]: tunneling packet encapsulation\n"
+ " --filter-type[1-3]: filter type for tunneling packet\n"
+ " 1: Inner MAC and tenent ID\n"
+ " 2: Inner MAC and VLAN, and tenent ID\n"
+ " 3: Outer MAC, Inner MAC and tenent ID\n"
+ " -p PORTMASK: Set mask for ports to be used by application\n"
+ " --rx-retry [0|1]: disable/enable(default) retries on rx."
+ " Enable retry if destintation queue is full\n"
+ " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX."
+ " This makes effect only if retries on rx enabled\n"
+ " --rx-retry-num [0-N]: the number of retries on rx."
+ " This makes effect only if retries on rx enabled\n"
+ " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
+ " --dev-basename: The basename to be used for the character device.\n",
+ prgname);
+}
+
+/**
+ * Parse the arguments given in the command line of the application.
+ */
+static int
+tep_termination_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ int option_index;
+ unsigned i;
+ const char *prgname = argv[0];
+ static struct option long_option[] = {
+ {CMD_LINE_OPT_NB_DEVICES, required_argument, NULL, 0},
+ {CMD_LINE_OPT_UDP_PORT, required_argument, NULL, 0},
+ {CMD_LINE_OPT_TX_CHECKSUM, required_argument, NULL, 0},
+ {CMD_LINE_OPT_TSO_SEGSZ, required_argument, NULL, 0},
+ {CMD_LINE_OPT_DECAP, required_argument, NULL, 0},
+ {CMD_LINE_OPT_ENCAP, required_argument, NULL, 0},
+ {CMD_LINE_OPT_FILTER_TYPE, required_argument, NULL, 0},
+ {CMD_LINE_OPT_RX_RETRY, required_argument, NULL, 0},
+ {CMD_LINE_OPT_RX_RETRY_DELAY, required_argument, NULL, 0},
+ {CMD_LINE_OPT_RX_RETRY_NUM, required_argument, NULL, 0},
+ {CMD_LINE_OPT_STATS, required_argument, NULL, 0},
+ {CMD_LINE_OPT_DEV_BASENAME, required_argument, NULL, 0},
+ {NULL, 0, 0, 0},
+ };
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:",
+ long_option, &option_index)) != EOF) {
+ switch (opt) {
+ /* Portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid portmask\n");
+ tep_termination_usage(prgname);
+ return -1;
+ }
+ break;
+ case 0:
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_NB_DEVICES,
+ sizeof(CMD_LINE_OPT_NB_DEVICES))) {
+ ret = parse_num_opt(optarg, MAX_DEVICES);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for nb-devices [0-%d]\n",
+ MAX_DEVICES);
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ nb_devices = ret;
+ }
+
+ /* Enable/disable retries on RX. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_RX_RETRY,
+ sizeof(CMD_LINE_OPT_RX_RETRY))) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for rx-retry [0|1]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ enable_retry = ret;
+ }
+
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_TSO_SEGSZ,
+ sizeof(CMD_LINE_OPT_TSO_SEGSZ))) {
+ ret = parse_num_opt(optarg, INT16_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for TCP segment size [0-N]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ tso_segsz = ret;
+ }
+
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_UDP_PORT,
+ sizeof(CMD_LINE_OPT_UDP_PORT))) {
+ ret = parse_num_opt(optarg, INT16_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for UDP port [0-N]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ udp_port = ret;
+ }
+
+ /* Specify the retries delay time (in useconds) on RX.*/
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_RX_RETRY_DELAY,
+ sizeof(CMD_LINE_OPT_RX_RETRY_DELAY))) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for rx-retry-delay [0-N]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ burst_rx_delay_time = ret;
+ }
+
+ /* Specify the retries number on RX. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_RX_RETRY_NUM,
+ sizeof(CMD_LINE_OPT_RX_RETRY_NUM))) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for rx-retry-num [0-N]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ burst_rx_retry_num = ret;
+ }
+
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_TX_CHECKSUM,
+ sizeof(CMD_LINE_OPT_TX_CHECKSUM))) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for tx-checksum [0|1]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ tx_checksum = ret;
+ }
+
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_FILTER_TYPE,
+ sizeof(CMD_LINE_OPT_FILTER_TYPE))) {
+ ret = parse_num_opt(optarg, 3);
+ if ((ret == -1) || (ret == 0)) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for filter type [1-3]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ filter_idx = ret - 1;
+ }
+
+ /* Enable/disable encapsulation on RX. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_DECAP,
+ sizeof(CMD_LINE_OPT_DECAP))) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for decap [0|1]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ rx_decap = ret;
+ }
+
+ /* Enable/disable encapsulation on TX. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_ENCAP,
+ sizeof(CMD_LINE_OPT_ENCAP))) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for encap [0|1]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ tx_encap = ret;
+ }
+
+ /* Enable/disable stats. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_STATS,
+ sizeof(CMD_LINE_OPT_STATS))) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for stats [0..N]\n");
+ tep_termination_usage(prgname);
+ return -1;
+ } else
+ enable_stats = ret;
+ }
+
+ /* Set character device basename. */
+ if (!strncmp(long_option[option_index].name,
+ CMD_LINE_OPT_DEV_BASENAME,
+ sizeof(CMD_LINE_OPT_DEV_BASENAME))) {
+ if (us_vhost_parse_basename(optarg) == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for character "
+ "device basename (Max %d characters)\n",
+ MAX_BASENAME_SZ);
+ tep_termination_usage(prgname);
+ return -1;
+ }
+ }
+
+ break;
+
+ /* Invalid option - print options. */
+ default:
+ tep_termination_usage(prgname);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (enabled_port_mask & (1 << i))
+ ports[nb_ports++] = (uint8_t)i;
+ }
+
+ if ((nb_ports == 0) || (nb_ports > MAX_SUP_PORTS)) {
+ RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n", nb_ports,
+ MAX_SUP_PORTS);
+ return -1;
+ }
+
+ return 0;
+}
+
+/**
+ * Update the global var NB_PORTS and array PORTS
+ * according to system ports number and return valid ports number
+ */
+static unsigned
+check_ports_num(unsigned max_nb_ports)
+{
+ unsigned valid_nb_ports = nb_ports;
+ unsigned portid;
+
+ if (nb_ports > max_nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) "
+ " exceeds total system port number(%u)\n",
+ nb_ports, max_nb_ports);
+ nb_ports = max_nb_ports;
+ }
+
+ for (portid = 0; portid < nb_ports; portid++) {
+ if (ports[portid] >= max_nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "\nSpecified port ID(%u) exceeds max "
+ " system port ID(%u)\n",
+ ports[portid], (max_nb_ports - 1));
+ ports[portid] = INVALID_PORT_ID;
+ valid_nb_ports--;
+ }
+ }
+ return valid_nb_ports;
+}
+
+/**
+ * This function routes the TX packet to the correct interface. This may be a local device
+ * or the physical port.
+ */
+static inline void __attribute__((always_inline))
+virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m)
+{
+ struct mbuf_table *tx_q;
+ struct rte_mbuf **m_table;
+ unsigned len, ret = 0;
+ const uint16_t lcore_id = rte_lcore_id();
+ struct virtio_net *dev = vdev->dev;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n",
+ dev->device_fh);
+
+ /* Add packet to the port tx queue */
+ tx_q = &lcore_tx_queue[lcore_id];
+ len = tx_q->len;
+
+ tx_q->m_table[len] = m;
+ len++;
+ if (enable_stats) {
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx++;
+ }
+
+ if (unlikely(len == MAX_PKT_BURST)) {
+ m_table = (struct rte_mbuf **)tx_q->m_table;
+ ret = overlay_options.tx_handle(ports[0],
+ (uint16_t)tx_q->txq_id, m_table,
+ (uint16_t)tx_q->len);
+
+ /* Free any buffers not handled by TX and update
+ * the port stats.
+ */
+ if (unlikely(ret < len)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < len);
+ }
+
+ len = 0;
+ }
+
+ tx_q->len = len;
+ return;
+}
+
+/**
+ * This function is called by each data core. It handles all
+ * RX/TX registered with the core. For TX the specific lcore
+ * linked list is used. For RX, MAC addresses are compared
+ * with all devices in the main linked list.
+ */
+static int
+switch_worker(__rte_unused void *arg)
+{
+ struct rte_mempool *mbuf_pool = arg;
+ struct virtio_net *dev = NULL;
+ struct vhost_dev *vdev = NULL;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct virtio_net_data_ll *dev_ll;
+ struct mbuf_table *tx_q;
+ volatile struct lcore_ll_info *lcore_ll;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1)
+ / US_PER_S * BURST_TX_DRAIN_US;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
+ unsigned i, ret = 0;
+ const uint16_t lcore_id = rte_lcore_id();
+ const uint16_t num_cores = (uint16_t)rte_lcore_count();
+ uint16_t rx_count = 0;
+ uint16_t tx_count;
+ uint32_t retry = 0;
+
+ RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
+ lcore_ll = lcore_info[lcore_id].lcore_ll;
+ prev_tsc = 0;
+
+ tx_q = &lcore_tx_queue[lcore_id];
+ for (i = 0; i < num_cores; i++) {
+ if (lcore_ids[i] == lcore_id) {
+ tx_q->txq_id = i;
+ break;
+ }
+ }
+
+ while (1) {
+ cur_tsc = rte_rdtsc();
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ if (tx_q->len) {
+ LOG_DEBUG(VHOST_DATA, "TX queue drained after "
+ "timeout with burst size %u\n",
+ tx_q->len);
+ ret = overlay_options.tx_handle(ports[0],
+ (uint16_t)tx_q->txq_id,
+ (struct rte_mbuf **)tx_q->m_table,
+ (uint16_t)tx_q->len);
+ if (unlikely(ret < tx_q->len)) {
+ do {
+ rte_pktmbuf_free(tx_q->m_table[ret]);
+ } while (++ret < tx_q->len);
+ }
+
+ tx_q->len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+
+ }
+
+ rte_prefetch0(lcore_ll->ll_root_used);
+
+ /**
+ * Inform the configuration core that we have exited
+ * the linked list and that no devices are
+ * in use if requested.
+ */
+ if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
+ lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+
+ /*
+ * Process devices
+ */
+ dev_ll = lcore_ll->ll_root_used;
+
+ while (dev_ll != NULL) {
+ vdev = dev_ll->vdev;
+ dev = vdev->dev;
+
+ if (unlikely(vdev->remove)) {
+ dev_ll = dev_ll->next;
+ overlay_options.tunnel_destroy(vdev);
+ vdev->ready = DEVICE_SAFE_REMOVE;
+ continue;
+ }
+ if (likely(vdev->ready == DEVICE_RX)) {
+ /* Handle guest RX */
+ rx_count = rte_eth_rx_burst(ports[0],
+ vdev->rx_q, pkts_burst, MAX_PKT_BURST);
+
+ if (rx_count) {
+ /*
+ * Retry is enabled and the queue is
+ * full then we wait and retry to
+ * avoid packet loss. Here MAX_PKT_BURST
+ * must be less than virtio queue size
+ */
+ if (enable_retry && unlikely(rx_count >
+ rte_vring_available_entries(dev, VIRTIO_RXQ))) {
+ for (retry = 0; retry < burst_rx_retry_num;
+ retry++) {
+ rte_delay_us(burst_rx_delay_time);
+ if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
+ break;
+ }
+ }
+
+ ret_count = overlay_options.rx_handle(dev, pkts_burst, rx_count);
+ if (enable_stats) {
+ rte_atomic64_add(
+ &dev_statistics[dev->device_fh].rx_total_atomic,
+ rx_count);
+ rte_atomic64_add(
+ &dev_statistics[dev->device_fh].rx_atomic, ret_count);
+ }
+ while (likely(rx_count)) {
+ rx_count--;
+ rte_pktmbuf_free(pkts_burst[rx_count]);
+ }
+
+ }
+ }
+
+ if (likely(!vdev->remove)) {
+ /* Handle guest TX*/
+ tx_count = rte_vhost_dequeue_burst(dev,
+ VIRTIO_TXQ, mbuf_pool,
+ pkts_burst, MAX_PKT_BURST);
+ /* If this is the first received packet we need to learn the MAC */
+ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
+ if (vdev->remove ||
+ (overlay_options.tunnel_setup(vdev, pkts_burst[0]) == -1)) {
+ while (tx_count)
+ rte_pktmbuf_free(pkts_burst[--tx_count]);
+ }
+ }
+ while (tx_count)
+ virtio_tx_route(vdev, pkts_burst[--tx_count]);
+ }
+
+ /* move to the next device in the list */
+ dev_ll = dev_ll->next;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * Add an entry to a used linked list. A free entry must first be found
+ * in the free linked list using get_data_ll_free_entry();
+ */
+static void
+add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ /* Set next as NULL and use a compiler barrier to avoid reordering. */
+ ll_dev->next = NULL;
+ rte_compiler_barrier();
+
+ /* If ll == NULL then this is the first device. */
+ if (ll) {
+ /* Increment to the tail of the linked list. */
+ while (ll->next != NULL)
+ ll = ll->next;
+
+ ll->next = ll_dev;
+ } else {
+ *ll_root_addr = ll_dev;
+ }
+}
+
+/**
+ * Remove an entry from a used linked list. The entry must then be added to
+ * the free linked list using put_data_ll_free_entry().
+ */
+static void
+rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev,
+ struct virtio_net_data_ll *ll_dev_last)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ if (unlikely((ll == NULL) || (ll_dev == NULL)))
+ return;
+
+ if (ll_dev == ll)
+ *ll_root_addr = ll_dev->next;
+ else
+ if (likely(ll_dev_last != NULL))
+ ll_dev_last->next = ll_dev->next;
+ else
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Remove entry form ll failed.\n");
+}
+
+/**
+ * Find and return an entry from the free linked list.
+ */
+static struct virtio_net_data_ll *
+get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+ struct virtio_net_data_ll *ll_dev;
+
+ if (ll_free == NULL)
+ return NULL;
+
+ ll_dev = ll_free;
+ *ll_root_addr = ll_free->next;
+
+ return ll_dev;
+}
+
+/**
+ * Place an entry back on to the free linked list.
+ */
+static void
+put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+
+ if (ll_dev == NULL)
+ return;
+
+ ll_dev->next = ll_free;
+ *ll_root_addr = ll_dev;
+}
+
+/**
+ * Creates a linked list of a given size.
+ */
+static struct virtio_net_data_ll *
+alloc_data_ll(uint32_t size)
+{
+ struct virtio_net_data_ll *ll_new;
+ uint32_t i;
+
+ /* Malloc and then chain the linked list. */
+ ll_new = malloc(size * sizeof(struct virtio_net_data_ll));
+ if (ll_new == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for ll_new.\n");
+ return NULL;
+ }
+
+ for (i = 0; i < size - 1; i++) {
+ ll_new[i].vdev = NULL;
+ ll_new[i].next = &ll_new[i+1];
+ }
+ ll_new[i].next = NULL;
+
+ return ll_new;
+}
+
+/**
+ * Create the main linked list along with each individual cores
+ * linked list. A used and a free list are created to manage entries.
+ */
+static int
+init_data_ll(void)
+{
+ int lcore;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll =
+ malloc(sizeof(struct lcore_ll_info));
+ if (lcore_info[lcore].lcore_ll == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Failed to allocate memory for lcore_ll.\n");
+ return -1;
+ }
+
+ lcore_info[lcore].lcore_ll->device_num = 0;
+ lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+ lcore_info[lcore].lcore_ll->ll_root_used = NULL;
+ if (nb_devices % nb_switching_cores)
+ lcore_info[lcore].lcore_ll->ll_root_free =
+ alloc_data_ll((nb_devices / nb_switching_cores)
+ + 1);
+ else
+ lcore_info[lcore].lcore_ll->ll_root_free =
+ alloc_data_ll(nb_devices / nb_switching_cores);
+ }
+
+ /* Allocate devices up to a maximum of MAX_DEVICES. */
+ ll_root_free = alloc_data_ll(MIN((nb_devices), MAX_DEVICES));
+
+ return 0;
+}
+
+/**
+ * Remove a device from the specific data core linked list and
+ * from the main linked list. Synchonization occurs through the use
+ * of the lcore dev_removal_flag. Device is made volatile here
+ * to avoid re-ordering of dev->remove=1 which can cause an infinite
+ * loop in the rte_pause loop.
+ */
+static void
+destroy_device(volatile struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_lcore_dev_cur;
+ struct virtio_net_data_ll *ll_main_dev_cur;
+ struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
+ struct virtio_net_data_ll *ll_main_dev_last = NULL;
+ struct vhost_dev *vdev;
+ int lcore;
+
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+
+ vdev = (struct vhost_dev *)dev->priv;
+
+ /* set the remove flag. */
+ vdev->remove = 1;
+ while (vdev->ready != DEVICE_SAFE_REMOVE)
+ rte_pause();
+
+ /* Search for entry to be removed from lcore ll */
+ ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;
+ while (ll_lcore_dev_cur != NULL) {
+ if (ll_lcore_dev_cur->vdev == vdev) {
+ break;
+ } else {
+ ll_lcore_dev_last = ll_lcore_dev_cur;
+ ll_lcore_dev_cur = ll_lcore_dev_cur->next;
+ }
+ }
+
+ if (ll_lcore_dev_cur == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to find the dev to be destroy.\n",
+ dev->device_fh);
+ return;
+ }
+
+ /* Search for entry to be removed from main ll */
+ ll_main_dev_cur = ll_root_used;
+ ll_main_dev_last = NULL;
+ while (ll_main_dev_cur != NULL) {
+ if (ll_main_dev_cur->vdev == vdev) {
+ break;
+ } else {
+ ll_main_dev_last = ll_main_dev_cur;
+ ll_main_dev_cur = ll_main_dev_cur->next;
+ }
+ }
+
+ /* Remove entries from the lcore and main ll. */
+ rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used,
+ ll_lcore_dev_cur, ll_lcore_dev_last);
+ rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
+
+ /* Set the dev_removal_flag on each lcore. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll->dev_removal_flag =
+ REQUEST_DEV_REMOVAL;
+ }
+
+ /*
+ * Once each core has set the dev_removal_flag to
+ * ACK_DEV_REMOVAL we can be sure that they can no longer access
+ * the device removed from the linked lists and that the devices
+ * are no longer in use.
+ */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ while (lcore_info[lcore].lcore_ll->dev_removal_flag
+ != ACK_DEV_REMOVAL)
+ rte_pause();
+ }
+
+ /* Add the entries back to the lcore and main free ll.*/
+ put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free,
+ ll_lcore_dev_cur);
+ put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
+
+ /* Decrement number of device on the lcore. */
+ lcore_info[vdev->coreid].lcore_ll->device_num--;
+
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed "
+ "from data core\n", dev->device_fh);
+
+ rte_free(vdev);
+
+}
+
+/**
+ * A new device is added to a data core. First the device is added
+ * to the main linked list and the allocated to a specific data core.
+ */
+static int
+new_device(struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_dev;
+ int lcore, core_add = 0;
+ uint32_t device_num_min = nb_devices;
+ struct vhost_dev *vdev;
+
+ vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
+ if (vdev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%"PRIu64") Couldn't allocate memory for vhost dev\n",
+ dev->device_fh);
+ return -1;
+ }
+ vdev->dev = dev;
+ dev->priv = vdev;
+ /* Add device to main ll */
+ ll_dev = get_data_ll_free_entry(&ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in"
+ " linked list Device limit of %d devices per core"
+ " has been reached\n", dev->device_fh, nb_devices);
+ if (vdev->regions_hpa)
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+ ll_dev->vdev = vdev;
+ add_data_ll_entry(&ll_root_used, ll_dev);
+ vdev->rx_q = dev->device_fh;
+
+ /* reset ready flag */
+ vdev->ready = DEVICE_MAC_LEARNING;
+ vdev->remove = 0;
+
+ /* Find a suitable lcore to add the device. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
+ device_num_min = lcore_info[lcore].lcore_ll->device_num;
+ core_add = lcore;
+ }
+ }
+ /* Add device to lcore ll */
+ ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%"PRIu64") Failed to add device to data core\n",
+ dev->device_fh);
+ vdev->ready = DEVICE_SAFE_REMOVE;
+ destroy_device(dev);
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+ ll_dev->vdev = vdev;
+ vdev->coreid = core_add;
+
+ add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used,
+ ll_dev);
+
+ /* Initialize device stats */
+ memset(&dev_statistics[dev->device_fh], 0,
+ sizeof(struct device_statistics));
+
+ /* Disable notifications. */
+ rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ lcore_info[vdev->coreid].lcore_ll->device_num++;
+ dev->flags |= VIRTIO_DEV_RUNNING;
+
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n",
+ dev->device_fh, vdev->coreid);
+
+ return 0;
+}
+
+/**
+ * These callback allow devices to be added to the data core when configuration
+ * has been fully complete.
+ */
+static const struct virtio_net_device_ops virtio_net_device_ops = {
+ .new_device = new_device,
+ .destroy_device = destroy_device,
+};
+
+/**
+ * This is a thread will wake up after a period to print stats if the user has
+ * enabled them.
+ */
+static void
+print_stats(void)
+{
+ struct virtio_net_data_ll *dev_ll;
+ uint64_t tx_dropped, rx_dropped;
+ uint64_t tx, tx_total, rx, rx_total, rx_ip_csum, rx_l4_csum;
+ uint32_t device_fh;
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char top_left[] = { 27, '[', '1', ';', '1', 'H', '\0' };
+
+ while (1) {
+ sleep(enable_stats);
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, top_left);
+
+ printf("\nDevice statistics ================================");
+
+ dev_ll = ll_root_used;
+ while (dev_ll != NULL) {
+ device_fh = (uint32_t)dev_ll->vdev->dev->device_fh;
+ tx_total = dev_statistics[device_fh].tx_total;
+ tx = dev_statistics[device_fh].tx;
+ tx_dropped = tx_total - tx;
+
+ rx_total = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_total_atomic);
+ rx = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_atomic);
+ rx_dropped = rx_total - rx;
+ rx_ip_csum = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_bad_ip_csum);
+ rx_l4_csum = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_bad_l4_csum);
+
+ printf("\nStatistics for device %"PRIu32" ----------"
+ "\nTX total: %"PRIu64""
+ "\nTX dropped: %"PRIu64""
+ "\nTX successful: %"PRIu64""
+ "\nRX total: %"PRIu64""
+ "\nRX bad IP csum: %"PRIu64""
+ "\nRX bad L4 csum: %"PRIu64""
+ "\nRX dropped: %"PRIu64""
+ "\nRX successful: %"PRIu64"",
+ device_fh,
+ tx_total,
+ tx_dropped,
+ tx,
+ rx_total,
+ rx_ip_csum,
+ rx_l4_csum,
+ rx_dropped,
+ rx);
+
+ dev_ll = dev_ll->next;
+ }
+ printf("\n================================================\n");
+ }
+}
+
+/**
+ * Main function, does initialisation and calls the per-lcore functions. The CUSE
+ * device is also registered here to handle the IOCTLs.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool = NULL;
+ unsigned lcore_id, core_id = 0;
+ unsigned nb_ports, valid_nb_ports;
+ int ret;
+ uint8_t portid;
+ uint16_t queue_id;
+ static pthread_t tid;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse app arguments */
+ ret = tep_termination_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid argument\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ if (rte_lcore_is_enabled(lcore_id))
+ lcore_ids[core_id++] = lcore_id;
+
+ /* set the number of swithcing cores available */
+ nb_switching_cores = rte_lcore_count()-1;
+
+ /* Get the number of physical ports. */
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Update the global var NB_PORTS and global array PORTS
+ * and get value of var VALID_NB_PORTS according to system ports number
+ */
+ valid_nb_ports = check_ports_num(nb_ports);
+
+ if ((valid_nb_ports == 0) || (valid_nb_ports > MAX_SUP_PORTS)) {
+ rte_exit(EXIT_FAILURE, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n", nb_ports,
+ MAX_SUP_PORTS);
+ }
+ /* Create the mbuf pool. */
+ mbuf_pool = rte_mempool_create(
+ "MBUF_POOL",
+ NUM_MBUFS_PER_PORT
+ * valid_nb_ports,
+ MBUF_SIZE, MBUF_CACHE_SIZE,
+ sizeof(struct rte_pktmbuf_pool_private),
+ rte_pktmbuf_pool_init, NULL,
+ rte_pktmbuf_init, NULL,
+ rte_socket_id(), 0);
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++)
+ vpool_array[queue_id].pool = mbuf_pool;
+
+ /* Set log level. */
+ rte_set_log_level(LOG_LEVEL);
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "Skipping disabled port %d\n", portid);
+ continue;
+ }
+ if (overlay_options.port_configure(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot initialize network ports\n");
+ }
+
+ /* Initialise all linked lists. */
+ if (init_data_ll() == -1)
+ rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n");
+
+ /* Initialize device stats */
+ memset(&dev_statistics, 0, sizeof(dev_statistics));
+
+ /* Enable stats if the user option is set. */
+ if (enable_stats) {
+ ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "Cannot create print-stats thread\n");
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
+ ret = rte_thread_setname(tid, thread_name);
+ if (ret != 0)
+ RTE_LOG(ERR, VHOST_CONFIG, "Cannot set print-stats name\n");
+ }
+
+ /* Launch all data cores. */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(switch_worker,
+ mbuf_pool, lcore_id);
+ }
+ rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
+
+ /* Register CUSE device to handle IOCTLs. */
+ ret = rte_vhost_driver_register((char *)&dev_basename);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "CUSE device setup failure.\n");
+
+ rte_vhost_driver_callback_register(&virtio_net_device_ops);
+
+ /* Start CUSE session. */
+ rte_vhost_driver_session_start();
+
+ return 0;
+}
diff --git a/examples/tep_termination/main.h b/examples/tep_termination/main.h
new file mode 100644
index 00000000..a34301ad
--- /dev/null
+++ b/examples/tep_termination/main.h
@@ -0,0 +1,129 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+#include <rte_ether.h>
+
+#ifdef DEBUG
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do {} while (0)
+#endif
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2
+#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3
+
+/* State of virtio device. */
+#define DEVICE_MAC_LEARNING 0
+#define DEVICE_RX 1
+#define DEVICE_SAFE_REMOVE 2
+
+#define MAX_QUEUES 512
+
+/* Max burst size for RX/TX */
+#define MAX_PKT_BURST 32
+
+/* Max number of devices. Limited by the application. */
+#define MAX_DEVICES 64
+
+/* Per-device statistics struct */
+struct device_statistics {
+ uint64_t tx_total;
+ rte_atomic64_t rx_total_atomic;
+ uint64_t rx_total;
+ uint64_t tx;
+ rte_atomic64_t rx_atomic;
+ /**< Bad inner IP csum for tunneling pkt */
+ rte_atomic64_t rx_bad_ip_csum;
+ /**< Bad inner L4 csum for tunneling pkt */
+ rte_atomic64_t rx_bad_l4_csum;
+} __rte_cache_aligned;
+
+/**
+ * Device linked list structure for data path.
+ */
+struct vhost_dev {
+ /**< Pointer to device created by vhost lib. */
+ struct virtio_net *dev;
+ /**< Number of memory regions for gpa to hpa translation. */
+ uint32_t nregions_hpa;
+ /**< Memory region information for gpa to hpa translation. */
+ struct virtio_memory_regions_hpa *regions_hpa;
+ /**< Device MAC address (Obtained on first TX packet). */
+ struct ether_addr mac_address;
+ /**< RX queue number. */
+ uint16_t rx_q;
+ /**< Data core that the device is added to. */
+ uint16_t coreid;
+ /**< A device is set as ready if the MAC address has been set. */
+ volatile uint8_t ready;
+ /**< Device is marked for removal from the data core. */
+ volatile uint8_t remove;
+} __rte_cache_aligned;
+
+/**
+ * Structure containing data core specific information.
+ */
+struct lcore_ll_info {
+ /**< Pointer to head in free linked list. */
+ struct virtio_net_data_ll *ll_root_free;
+ /**< Pointer to head of used linked list. */
+ struct virtio_net_data_ll *ll_root_used;
+ /**< Number of devices on lcore. */
+ uint32_t device_num;
+ /**< Flag to synchronize device removal. */
+ volatile uint8_t dev_removal_flag;
+};
+
+struct lcore_info {
+ /**< Pointer to data core specific lcore_ll_info struct */
+ struct lcore_ll_info *lcore_ll;
+};
+
+struct virtio_net_data_ll {
+ /**< Pointer to device created by configuration core. */
+ struct vhost_dev *vdev;
+ /**< Pointer to next device in linked list. */
+ struct virtio_net_data_ll *next;
+};
+
+uint32_t
+virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count);
+
+#endif /* _MAIN_H_ */
diff --git a/examples/tep_termination/vxlan.c b/examples/tep_termination/vxlan.c
new file mode 100644
index 00000000..5ee1f956
--- /dev/null
+++ b/examples/tep_termination/vxlan.c
@@ -0,0 +1,259 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#include <stdint.h>
+#include <rte_mbuf.h>
+#include <rte_hash_crc.h>
+#include <rte_byteorder.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+#include <rte_sctp.h>
+
+#include "main.h"
+#include "vxlan.h"
+
+static uint16_t
+get_psd_sum(void *l3_hdr, uint16_t ethertype, uint64_t ol_flags)
+{
+ if (ethertype == ETHER_TYPE_IPv4)
+ return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
+ else /* assume ethertype == ETHER_TYPE_IPv6 */
+ return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
+}
+
+/**
+ * Parse an ethernet header to fill the ethertype, outer_l2_len, outer_l3_len and
+ * ipproto. This function is able to recognize IPv4/IPv6 with one optional vlan
+ * header.
+ */
+static void
+parse_ethernet(struct ether_hdr *eth_hdr, union tunnel_offload_info *info,
+ uint8_t *l4_proto)
+{
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ uint16_t ethertype;
+
+ info->outer_l2_len = sizeof(struct ether_hdr);
+ ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+ if (ethertype == ETHER_TYPE_VLAN) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+ info->outer_l2_len += sizeof(struct vlan_hdr);
+ ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+ }
+
+ switch (ethertype) {
+ case ETHER_TYPE_IPv4:
+ ipv4_hdr = (struct ipv4_hdr *)
+ ((char *)eth_hdr + info->outer_l2_len);
+ info->outer_l3_len = sizeof(struct ipv4_hdr);
+ *l4_proto = ipv4_hdr->next_proto_id;
+ break;
+ case ETHER_TYPE_IPv6:
+ ipv6_hdr = (struct ipv6_hdr *)
+ ((char *)eth_hdr + info->outer_l2_len);
+ info->outer_l3_len = sizeof(struct ipv6_hdr);
+ *l4_proto = ipv6_hdr->proto;
+ break;
+ default:
+ info->outer_l3_len = 0;
+ *l4_proto = 0;
+ break;
+ }
+}
+
+/**
+ * Calculate the checksum of a packet in hardware
+ */
+static uint64_t
+process_inner_cksums(struct ether_hdr *eth_hdr, union tunnel_offload_info *info)
+{
+ void *l3_hdr = NULL;
+ uint8_t l4_proto;
+ uint16_t ethertype;
+ struct ipv4_hdr *ipv4_hdr;
+ struct ipv6_hdr *ipv6_hdr;
+ struct udp_hdr *udp_hdr;
+ struct tcp_hdr *tcp_hdr;
+ struct sctp_hdr *sctp_hdr;
+ uint64_t ol_flags = 0;
+
+ info->l2_len = sizeof(struct ether_hdr);
+ ethertype = rte_be_to_cpu_16(eth_hdr->ether_type);
+
+ if (ethertype == ETHER_TYPE_VLAN) {
+ struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1);
+ info->l2_len += sizeof(struct vlan_hdr);
+ ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto);
+ }
+
+ l3_hdr = (char *)eth_hdr + info->l2_len;
+
+ if (ethertype == ETHER_TYPE_IPv4) {
+ ipv4_hdr = (struct ipv4_hdr *)l3_hdr;
+ ipv4_hdr->hdr_checksum = 0;
+ ol_flags |= PKT_TX_IPV4;
+ ol_flags |= PKT_TX_IP_CKSUM;
+ info->l3_len = sizeof(struct ipv4_hdr);
+ l4_proto = ipv4_hdr->next_proto_id;
+ } else if (ethertype == ETHER_TYPE_IPv6) {
+ ipv6_hdr = (struct ipv6_hdr *)l3_hdr;
+ info->l3_len = sizeof(struct ipv6_hdr);
+ l4_proto = ipv6_hdr->proto;
+ ol_flags |= PKT_TX_IPV6;
+ } else
+ return 0; /* packet type not supported, nothing to do */
+
+ if (l4_proto == IPPROTO_UDP) {
+ udp_hdr = (struct udp_hdr *)((char *)l3_hdr + info->l3_len);
+ ol_flags |= PKT_TX_UDP_CKSUM;
+ udp_hdr->dgram_cksum = get_psd_sum(l3_hdr,
+ ethertype, ol_flags);
+ } else if (l4_proto == IPPROTO_TCP) {
+ tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + info->l3_len);
+ ol_flags |= PKT_TX_TCP_CKSUM;
+ tcp_hdr->cksum = get_psd_sum(l3_hdr, ethertype,
+ ol_flags);
+ if (tso_segsz != 0) {
+ ol_flags |= PKT_TX_TCP_SEG;
+ info->tso_segsz = tso_segsz;
+ info->l4_len = sizeof(struct tcp_hdr);
+ }
+
+ } else if (l4_proto == IPPROTO_SCTP) {
+ sctp_hdr = (struct sctp_hdr *)((char *)l3_hdr + info->l3_len);
+ sctp_hdr->cksum = 0;
+ ol_flags |= PKT_TX_SCTP_CKSUM;
+ }
+
+ return ol_flags;
+}
+
+int
+decapsulation(struct rte_mbuf *pkt)
+{
+ uint8_t l4_proto = 0;
+ uint16_t outer_header_len;
+ struct udp_hdr *udp_hdr;
+ union tunnel_offload_info info = { .data = 0 };
+ struct ether_hdr *phdr = rte_pktmbuf_mtod(pkt, struct ether_hdr *);
+
+ parse_ethernet(phdr, &info, &l4_proto);
+
+ if (l4_proto != IPPROTO_UDP)
+ return -1;
+
+ udp_hdr = (struct udp_hdr *)((char *)phdr +
+ info.outer_l2_len + info.outer_l3_len);
+
+ /** check udp destination port, 4789 is the default vxlan port
+ * (rfc7348) or that the rx offload flag is set (i40e only
+ * currently)*/
+ if (udp_hdr->dst_port != rte_cpu_to_be_16(DEFAULT_VXLAN_PORT) &&
+ (pkt->packet_type & RTE_PTYPE_TUNNEL_MASK) == 0)
+ return -1;
+ outer_header_len = info.outer_l2_len + info.outer_l3_len
+ + sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr);
+
+ rte_pktmbuf_adj(pkt, outer_header_len);
+
+ return 0;
+}
+
+void
+encapsulation(struct rte_mbuf *m, uint8_t queue_id)
+{
+ uint vport_id;
+ uint64_t ol_flags = 0;
+ uint32_t old_len = m->pkt_len, hash;
+ union tunnel_offload_info tx_offload = { .data = 0 };
+ struct ether_hdr *phdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /*Allocate space for new ethernet, IPv4, UDP and VXLAN headers*/
+ struct ether_hdr *pneth = (struct ether_hdr *) rte_pktmbuf_prepend(m,
+ sizeof(struct ether_hdr) + sizeof(struct ipv4_hdr)
+ + sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr));
+
+ struct ipv4_hdr *ip = (struct ipv4_hdr *) &pneth[1];
+ struct udp_hdr *udp = (struct udp_hdr *) &ip[1];
+ struct vxlan_hdr *vxlan = (struct vxlan_hdr *) &udp[1];
+
+ /* convert TX queue ID to vport ID */
+ vport_id = queue_id - 1;
+
+ /* replace original Ethernet header with ours */
+ pneth = rte_memcpy(pneth, &app_l2_hdr[vport_id],
+ sizeof(struct ether_hdr));
+
+ /* copy in IP header */
+ ip = rte_memcpy(ip, &app_ip_hdr[vport_id],
+ sizeof(struct ipv4_hdr));
+ ip->total_length = rte_cpu_to_be_16(m->data_len
+ - sizeof(struct ether_hdr));
+
+ /* outer IP checksum */
+ ol_flags |= PKT_TX_OUTER_IP_CKSUM;
+ ip->hdr_checksum = 0;
+
+ /* inner IP checksum offload */
+ if (tx_checksum) {
+ ol_flags |= process_inner_cksums(phdr, &tx_offload);
+ m->l2_len = tx_offload.l2_len;
+ m->l3_len = tx_offload.l3_len;
+ m->l4_len = tx_offload.l4_len;
+ m->l2_len += ETHER_VXLAN_HLEN;
+ }
+
+ m->outer_l2_len = sizeof(struct ether_hdr);
+ m->outer_l3_len = sizeof(struct ipv4_hdr);
+
+ m->ol_flags |= ol_flags;
+ m->tso_segsz = tx_offload.tso_segsz;
+
+ /*VXLAN HEADER*/
+ vxlan->vx_flags = rte_cpu_to_be_32(VXLAN_HF_VNI);
+ vxlan->vx_vni = rte_cpu_to_be_32(vxdev.out_key << 8);
+
+ /*UDP HEADER*/
+ udp->dgram_cksum = 0;
+ udp->dgram_len = rte_cpu_to_be_16(old_len
+ + sizeof(struct udp_hdr)
+ + sizeof(struct vxlan_hdr));
+
+ udp->dst_port = rte_cpu_to_be_16(vxdev.dst_port);
+ hash = rte_hash_crc(phdr, 2 * ETHER_ADDR_LEN, phdr->ether_type);
+ udp->src_port = rte_cpu_to_be_16((((uint64_t) hash * PORT_RANGE) >> 32)
+ + PORT_MIN);
+
+ return;
+}
diff --git a/examples/tep_termination/vxlan.h b/examples/tep_termination/vxlan.h
new file mode 100644
index 00000000..4242e111
--- /dev/null
+++ b/examples/tep_termination/vxlan.h
@@ -0,0 +1,86 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VXLAN_H_
+#define _VXLAN_H_
+
+#include <rte_ether.h>
+#include <rte_ip.h>
+
+#define PORT_MIN 49152
+#define PORT_MAX 65535
+#define PORT_RANGE ((PORT_MAX - PORT_MIN) + 1)
+
+#define VXLAN_N_PORTS 2
+#define VXLAN_HF_VNI 0x08000000
+#define DEFAULT_VXLAN_PORT 4789
+
+extern struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
+extern struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
+extern uint8_t tx_checksum;
+extern uint16_t tso_segsz;
+
+struct vxlan_port {
+ uint32_t vport_id; /**< VirtIO port id */
+ uint32_t peer_ip; /**< remote VTEP IP address */
+ struct ether_addr peer_mac; /**< remote VTEP MAC address */
+ struct ether_addr vport_mac; /**< VirtIO port MAC address */
+} __rte_cache_aligned;
+
+struct vxlan_conf {
+ uint16_t dst_port; /**< VXLAN UDP destination port */
+ uint32_t port_ip; /**< DPDK port IP address*/
+ uint32_t in_key; /**< VLAN ID */
+ uint32_t out_key; /**< VXLAN VNI */
+ struct vxlan_port port[VXLAN_N_PORTS]; /**< VXLAN configuration */
+} __rte_cache_aligned;
+
+extern struct vxlan_conf vxdev;
+
+/* structure that caches offload info for the current packet */
+union tunnel_offload_info {
+ uint64_t data;
+ struct {
+ uint64_t l2_len:7; /**< L2 (MAC) Header Length. */
+ uint64_t l3_len:9; /**< L3 (IP) Header Length. */
+ uint64_t l4_len:8; /**< L4 Header Length. */
+ uint64_t tso_segsz:16; /**< TCP TSO segment size */
+ uint64_t outer_l2_len:7; /**< outer L2 Header Length */
+ uint64_t outer_l3_len:16; /**< outer L3 Header Length */
+ };
+} __rte_cache_aligned;
+
+int decapsulation(struct rte_mbuf *pkt);
+void encapsulation(struct rte_mbuf *m, uint8_t queue_id);
+
+#endif /* _VXLAN_H_ */
diff --git a/examples/tep_termination/vxlan_setup.c b/examples/tep_termination/vxlan_setup.c
new file mode 100644
index 00000000..2a48e142
--- /dev/null
+++ b/examples/tep_termination/vxlan_setup.c
@@ -0,0 +1,457 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <getopt.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_mbuf.h>
+#include <rte_malloc.h>
+#include <rte_ip.h>
+#include <rte_udp.h>
+#include <rte_tcp.h>
+
+#include "main.h"
+#include "rte_virtio_net.h"
+#include "vxlan.h"
+#include "vxlan_setup.h"
+
+#define IPV4_HEADER_LEN 20
+#define UDP_HEADER_LEN 8
+#define VXLAN_HEADER_LEN 8
+
+#define IP_VERSION 0x40
+#define IP_HDRLEN 0x05 /* default IP header length == five 32-bits words. */
+#define IP_DEFTTL 64 /* from RFC 1340. */
+#define IP_VHL_DEF (IP_VERSION | IP_HDRLEN)
+
+#define IP_DN_FRAGMENT_FLAG 0x0040
+
+/* Used to compare MAC addresses. */
+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
+
+/* Configurable number of RX/TX ring descriptors */
+#define RTE_TEST_RX_DESC_DEFAULT 1024
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+/* Default inner VLAN ID */
+#define INNER_VLAN_ID 100
+
+/* VXLAN device */
+struct vxlan_conf vxdev;
+
+struct ipv4_hdr app_ip_hdr[VXLAN_N_PORTS];
+struct ether_hdr app_l2_hdr[VXLAN_N_PORTS];
+
+/* local VTEP IP address */
+uint8_t vxlan_multicast_ips[2][4] = { {239, 1, 1, 1 }, {239, 1, 2, 1 } };
+
+/* Remote VTEP IP address */
+uint8_t vxlan_overlay_ips[2][4] = { {192, 168, 10, 1}, {192, 168, 30, 1} };
+
+/* Remote VTEP MAC address */
+uint8_t peer_mac[6] = {0x00, 0x11, 0x01, 0x00, 0x00, 0x01};
+
+/* VXLAN RX filter type */
+uint8_t tep_filter_type[] = {RTE_TUNNEL_FILTER_IMAC_TENID,
+ RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID,
+ RTE_TUNNEL_FILTER_OMAC_TENID_IMAC,};
+
+/* Options for configuring ethernet port */
+static const struct rte_eth_conf port_conf = {
+ .rxmode = {
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+};
+
+/**
+ * The one or two device(s) that belongs to the same tenant ID can
+ * be assigned in a VM.
+ */
+const uint16_t tenant_id_conf[] = {
+ 1000, 1000, 1001, 1001, 1002, 1002, 1003, 1003,
+ 1004, 1004, 1005, 1005, 1006, 1006, 1007, 1007,
+ 1008, 1008, 1009, 1009, 1010, 1010, 1011, 1011,
+ 1012, 1012, 1013, 1013, 1014, 1014, 1015, 1015,
+ 1016, 1016, 1017, 1017, 1018, 1018, 1019, 1019,
+ 1020, 1020, 1021, 1021, 1022, 1022, 1023, 1023,
+ 1024, 1024, 1025, 1025, 1026, 1026, 1027, 1027,
+ 1028, 1028, 1029, 1029, 1030, 1030, 1031, 1031,
+};
+
+/**
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+int
+vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ int retval;
+ uint16_t q;
+ struct rte_eth_dev_info dev_info;
+ uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
+ const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
+ const uint16_t tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
+ struct rte_eth_udp_tunnel tunnel_udp;
+ struct rte_eth_rxconf *rxconf;
+ struct rte_eth_txconf *txconf;
+ struct vxlan_conf *pconf = &vxdev;
+
+ pconf->dst_port = udp_port;
+
+ rte_eth_dev_info_get(port, &dev_info);
+
+ if (dev_info.max_rx_queues > MAX_QUEUES) {
+ rte_exit(EXIT_FAILURE,
+ "please define MAX_QUEUES no less than %u in %s\n",
+ dev_info.max_rx_queues, __FILE__);
+ }
+
+ rxconf = &dev_info.default_rxconf;
+ txconf = &dev_info.default_txconf;
+ txconf->txq_flags = 0;
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ rx_rings = nb_devices;
+
+ /* Configure ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Setup the queues. */
+ for (q = 0; q < rx_rings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
+ rte_eth_dev_socket_id(port),
+ rxconf,
+ mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+ for (q = 0; q < tx_rings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
+ rte_eth_dev_socket_id(port),
+ txconf);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the device. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ /* Configure UDP port for UDP tunneling */
+ tunnel_udp.udp_port = udp_port;
+ tunnel_udp.prot_type = RTE_TUNNEL_TYPE_VXLAN;
+ retval = rte_eth_dev_udp_tunnel_port_add(port, &tunnel_udp);
+ if (retval < 0)
+ return retval;
+ rte_eth_macaddr_get(port, &ports_eth_addr[port]);
+ RTE_LOG(INFO, PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ ports_eth_addr[port].addr_bytes[0],
+ ports_eth_addr[port].addr_bytes[1],
+ ports_eth_addr[port].addr_bytes[2],
+ ports_eth_addr[port].addr_bytes[3],
+ ports_eth_addr[port].addr_bytes[4],
+ ports_eth_addr[port].addr_bytes[5]);
+
+ if (tso_segsz != 0) {
+ struct rte_eth_dev_info dev_info;
+ rte_eth_dev_info_get(port, &dev_info);
+ if ((dev_info.tx_offload_capa & DEV_TX_OFFLOAD_TCP_TSO) == 0)
+ RTE_LOG(WARNING, PORT,
+ "hardware TSO offload is not supported\n");
+ }
+ return 0;
+}
+
+static int
+vxlan_rx_process(struct rte_mbuf *pkt)
+{
+ int ret = 0;
+
+ if (rx_decap)
+ ret = decapsulation(pkt);
+
+ return ret;
+}
+
+static void
+vxlan_tx_process(uint8_t queue_id, struct rte_mbuf *pkt)
+{
+ if (tx_encap)
+ encapsulation(pkt, queue_id);
+
+ return;
+}
+
+/*
+ * This function learns the MAC address of the device and set init
+ * L2 header and L3 header info.
+ */
+int
+vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m)
+{
+ int i, ret;
+ struct ether_hdr *pkt_hdr;
+ struct virtio_net *dev = vdev->dev;
+ uint64_t portid = dev->device_fh;
+ struct ipv4_hdr *ip;
+
+ struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
+
+ if (unlikely(portid > VXLAN_N_PORTS)) {
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%"PRIu64") WARNING: Not configuring device,"
+ "as already have %d ports for VXLAN.",
+ dev->device_fh, VXLAN_N_PORTS);
+ return -1;
+ }
+
+ /* Learn MAC address of guest device from packet */
+ pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ if (is_same_ether_addr(&(pkt_hdr->s_addr), &vdev->mac_address)) {
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%"PRIu64") WARNING: This device is using an existing"
+ " MAC address and has not been registered.\n",
+ dev->device_fh);
+ return -1;
+ }
+
+ for (i = 0; i < ETHER_ADDR_LEN; i++) {
+ vdev->mac_address.addr_bytes[i] =
+ vxdev.port[portid].vport_mac.addr_bytes[i] =
+ pkt_hdr->s_addr.addr_bytes[i];
+ vxdev.port[portid].peer_mac.addr_bytes[i] = peer_mac[i];
+ }
+
+ memset(&tunnel_filter_conf, 0,
+ sizeof(struct rte_eth_tunnel_filter_conf));
+
+ ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac);
+ tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
+
+ /* inner MAC */
+ ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac);
+
+ tunnel_filter_conf.queue_id = vdev->rx_q;
+ tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
+
+ if (tep_filter_type[filter_idx] == RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
+ tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
+
+ tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
+
+ ret = rte_eth_dev_filter_ctrl(ports[0],
+ RTE_ETH_FILTER_TUNNEL,
+ RTE_ETH_FILTER_ADD,
+ &tunnel_filter_conf);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "%d Failed to add device MAC address to cloud filter\n",
+ vdev->rx_q);
+ return -1;
+ }
+
+ /* Print out inner MAC and VNI info. */
+ RTE_LOG(INFO, VHOST_DATA,
+ "(%d) MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VNI %d registered\n",
+ vdev->rx_q,
+ vdev->mac_address.addr_bytes[0],
+ vdev->mac_address.addr_bytes[1],
+ vdev->mac_address.addr_bytes[2],
+ vdev->mac_address.addr_bytes[3],
+ vdev->mac_address.addr_bytes[4],
+ vdev->mac_address.addr_bytes[5],
+ tenant_id_conf[vdev->rx_q]);
+
+ vxdev.port[portid].vport_id = portid;
+
+ for (i = 0; i < 4; i++) {
+ /* Local VTEP IP */
+ vxdev.port_ip |= vxlan_multicast_ips[portid][i] << (8 * i);
+ /* Remote VTEP IP */
+ vxdev.port[portid].peer_ip |=
+ vxlan_overlay_ips[portid][i] << (8 * i);
+ }
+
+ vxdev.out_key = tenant_id_conf[vdev->rx_q];
+ ether_addr_copy(&vxdev.port[portid].peer_mac,
+ &app_l2_hdr[portid].d_addr);
+ ether_addr_copy(&ports_eth_addr[0],
+ &app_l2_hdr[portid].s_addr);
+ app_l2_hdr[portid].ether_type = rte_cpu_to_be_16(ETHER_TYPE_IPv4);
+
+ ip = &app_ip_hdr[portid];
+ ip->version_ihl = IP_VHL_DEF;
+ ip->type_of_service = 0;
+ ip->total_length = 0;
+ ip->packet_id = 0;
+ ip->fragment_offset = IP_DN_FRAGMENT_FLAG;
+ ip->time_to_live = IP_DEFTTL;
+ ip->next_proto_id = IPPROTO_UDP;
+ ip->hdr_checksum = 0;
+ ip->src_addr = vxdev.port_ip;
+ ip->dst_addr = vxdev.port[portid].peer_ip;
+
+ /* Set device as ready for RX. */
+ vdev->ready = DEVICE_RX;
+
+ return 0;
+}
+
+/**
+ * Removes cloud filter. Ensures that nothing is adding buffers to the RX
+ * queue before disabling RX on the device.
+ */
+void
+vxlan_unlink(struct vhost_dev *vdev)
+{
+ unsigned i = 0, rx_count;
+ int ret;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct rte_eth_tunnel_filter_conf tunnel_filter_conf;
+
+ if (vdev->ready == DEVICE_RX) {
+ memset(&tunnel_filter_conf, 0,
+ sizeof(struct rte_eth_tunnel_filter_conf));
+
+ ether_addr_copy(&ports_eth_addr[0], &tunnel_filter_conf.outer_mac);
+ ether_addr_copy(&vdev->mac_address, &tunnel_filter_conf.inner_mac);
+ tunnel_filter_conf.tenant_id = tenant_id_conf[vdev->rx_q];
+ tunnel_filter_conf.filter_type = tep_filter_type[filter_idx];
+
+ if (tep_filter_type[filter_idx] ==
+ RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID)
+ tunnel_filter_conf.inner_vlan = INNER_VLAN_ID;
+
+ tunnel_filter_conf.queue_id = vdev->rx_q;
+ tunnel_filter_conf.tunnel_type = RTE_TUNNEL_TYPE_VXLAN;
+
+ ret = rte_eth_dev_filter_ctrl(ports[0],
+ RTE_ETH_FILTER_TUNNEL,
+ RTE_ETH_FILTER_DELETE,
+ &tunnel_filter_conf);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "%d Failed to add device MAC address to cloud filter\n",
+ vdev->rx_q);
+ return;
+ }
+ for (i = 0; i < ETHER_ADDR_LEN; i++)
+ vdev->mac_address.addr_bytes[i] = 0;
+
+ /* Clear out the receive buffers */
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)vdev->rx_q,
+ pkts_burst, MAX_PKT_BURST);
+
+ while (rx_count) {
+ for (i = 0; i < rx_count; i++)
+ rte_pktmbuf_free(pkts_burst[i]);
+
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)vdev->rx_q,
+ pkts_burst, MAX_PKT_BURST);
+ }
+ vdev->ready = DEVICE_MAC_LEARNING;
+ }
+}
+
+/* Transmit packets after encapsulating */
+int
+vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts) {
+ int ret = 0;
+ uint16_t i;
+
+ for (i = 0; i < nb_pkts; i++)
+ vxlan_tx_process(queue_id, tx_pkts[i]);
+
+ ret = rte_eth_tx_burst(port_id, queue_id, tx_pkts, nb_pkts);
+
+ return ret;
+}
+
+/* Check for decapsulation and pass packets directly to VIRTIO device */
+int
+vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts_burst,
+ uint32_t rx_count)
+{
+ uint32_t i = 0;
+ uint32_t count = 0;
+ int ret;
+ struct rte_mbuf *pkts_valid[rx_count];
+
+ for (i = 0; i < rx_count; i++) {
+ if (enable_stats) {
+ rte_atomic64_add(
+ &dev_statistics[dev->device_fh].rx_bad_ip_csum,
+ (pkts_burst[i]->ol_flags & PKT_RX_IP_CKSUM_BAD)
+ != 0);
+ rte_atomic64_add(
+ &dev_statistics[dev->device_fh].rx_bad_ip_csum,
+ (pkts_burst[i]->ol_flags & PKT_RX_L4_CKSUM_BAD)
+ != 0);
+ }
+ ret = vxlan_rx_process(pkts_burst[i]);
+ if (unlikely(ret < 0))
+ continue;
+
+ pkts_valid[count] = pkts_burst[i];
+ count++;
+ }
+
+ ret = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_valid, count);
+ return ret;
+}
diff --git a/examples/tep_termination/vxlan_setup.h b/examples/tep_termination/vxlan_setup.h
new file mode 100644
index 00000000..1846540f
--- /dev/null
+++ b/examples/tep_termination/vxlan_setup.h
@@ -0,0 +1,87 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef VXLAN_SETUP_H_
+#define VXLAN_SETUP_H_
+
+extern uint16_t nb_devices;
+extern uint16_t udp_port;
+extern uint8_t filter_idx;
+extern uint8_t ports[RTE_MAX_ETHPORTS];
+extern struct ether_addr ports_eth_addr[RTE_MAX_ETHPORTS];
+extern uint32_t enable_stats;
+extern struct device_statistics dev_statistics[MAX_DEVICES];
+extern uint8_t rx_decap;
+extern uint8_t tx_encap;
+
+typedef int (*ol_port_configure_t)(uint8_t port,
+ struct rte_mempool *mbuf_pool);
+
+typedef int (*ol_tunnel_setup_t)(struct vhost_dev *vdev,
+ struct rte_mbuf *m);
+
+typedef void (*ol_tunnel_destroy_t)(struct vhost_dev *vdev);
+
+typedef int (*ol_tx_handle_t)(uint8_t port_id, uint16_t queue_id,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+
+typedef int (*ol_rx_handle_t)(struct virtio_net *dev, struct rte_mbuf **pkts,
+ uint32_t count);
+
+typedef int (*ol_param_handle)(struct virtio_net *dev);
+
+struct ol_switch_ops {
+ ol_port_configure_t port_configure;
+ ol_tunnel_setup_t tunnel_setup;
+ ol_tunnel_destroy_t tunnel_destroy;
+ ol_tx_handle_t tx_handle;
+ ol_rx_handle_t rx_handle;
+ ol_param_handle param_handle;
+};
+
+int
+vxlan_port_init(uint8_t port, struct rte_mempool *mbuf_pool);
+
+int
+vxlan_link(struct vhost_dev *vdev, struct rte_mbuf *m);
+
+void
+vxlan_unlink(struct vhost_dev *vdev);
+
+int
+vxlan_tx_pkts(uint8_t port_id, uint16_t queue_id,
+ struct rte_mbuf **tx_pkts, uint16_t nb_pkts);
+int
+vxlan_rx_pkts(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count);
+
+#endif /* VXLAN_SETUP_H_ */
diff --git a/examples/timer/Makefile b/examples/timer/Makefile
new file mode 100644
index 00000000..af12b7ba
--- /dev/null
+++ b/examples/timer/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = timer
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/timer/main.c b/examples/timer/main.c
new file mode 100644
index 00000000..37ad559e
--- /dev/null
+++ b/examples/timer/main.c
@@ -0,0 +1,151 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_memzone.h>
+#include <rte_launch.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_cycles.h>
+#include <rte_timer.h>
+#include <rte_debug.h>
+
+#define TIMER_RESOLUTION_CYCLES 20000000ULL /* around 10ms at 2 Ghz */
+
+static struct rte_timer timer0;
+static struct rte_timer timer1;
+
+/* timer0 callback */
+static void
+timer0_cb(__attribute__((unused)) struct rte_timer *tim,
+ __attribute__((unused)) void *arg)
+{
+ static unsigned counter = 0;
+ unsigned lcore_id = rte_lcore_id();
+
+ printf("%s() on lcore %u\n", __func__, lcore_id);
+
+ /* this timer is automatically reloaded until we decide to
+ * stop it, when counter reaches 20. */
+ if ((counter ++) == 20)
+ rte_timer_stop(tim);
+}
+
+/* timer1 callback */
+static void
+timer1_cb(__attribute__((unused)) struct rte_timer *tim,
+ __attribute__((unused)) void *arg)
+{
+ unsigned lcore_id = rte_lcore_id();
+ uint64_t hz;
+
+ printf("%s() on lcore %u\n", __func__, lcore_id);
+
+ /* reload it on another lcore */
+ hz = rte_get_timer_hz();
+ lcore_id = rte_get_next_lcore(lcore_id, 0, 1);
+ rte_timer_reset(tim, hz/3, SINGLE, lcore_id, timer1_cb, NULL);
+}
+
+static __attribute__((noreturn)) int
+lcore_mainloop(__attribute__((unused)) void *arg)
+{
+ uint64_t prev_tsc = 0, cur_tsc, diff_tsc;
+ unsigned lcore_id;
+
+ lcore_id = rte_lcore_id();
+ printf("Starting mainloop on core %u\n", lcore_id);
+
+ while (1) {
+ /*
+ * Call the timer handler on each core: as we don't
+ * need a very precise timer, so only call
+ * rte_timer_manage() every ~10ms (at 2Ghz). In a real
+ * application, this will enhance performances as
+ * reading the HPET timer is not efficient.
+ */
+ cur_tsc = rte_rdtsc();
+ diff_tsc = cur_tsc - prev_tsc;
+ if (diff_tsc > TIMER_RESOLUTION_CYCLES) {
+ rte_timer_manage();
+ prev_tsc = cur_tsc;
+ }
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ uint64_t hz;
+ unsigned lcore_id;
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ /* init RTE timer library */
+ rte_timer_subsystem_init();
+
+ /* init timer structures */
+ rte_timer_init(&timer0);
+ rte_timer_init(&timer1);
+
+ /* load timer0, every second, on master lcore, reloaded automatically */
+ hz = rte_get_timer_hz();
+ lcore_id = rte_lcore_id();
+ rte_timer_reset(&timer0, hz, PERIODICAL, lcore_id, timer0_cb, NULL);
+
+ /* load timer1, every second/3, on next lcore, reloaded manually */
+ lcore_id = rte_get_next_lcore(lcore_id, 0, 1);
+ rte_timer_reset(&timer1, hz/3, SINGLE, lcore_id, timer1_cb, NULL);
+
+ /* call lcore_mainloop() on every slave lcore */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(lcore_mainloop, NULL, lcore_id);
+ }
+
+ /* call it on master lcore too */
+ (void) lcore_mainloop(NULL);
+
+ return 0;
+}
diff --git a/examples/vhost/Makefile b/examples/vhost/Makefile
new file mode 100644
index 00000000..e95c68ae
--- /dev/null
+++ b/examples/vhost/Makefile
@@ -0,0 +1,59 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+ifneq ($(CONFIG_RTE_EXEC_ENV),"linuxapp")
+$(info This application can only operate in a linuxapp environment, \
+please change the definition of the RTE_TARGET environment variable)
+all:
+else
+
+# binary name
+APP = vhost-switch
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += -O2 -D_FILE_OFFSET_BITS=64
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif
diff --git a/examples/vhost/main.c b/examples/vhost/main.c
new file mode 100644
index 00000000..28c17afd
--- /dev/null
+++ b/examples/vhost/main.c
@@ -0,0 +1,3157 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arpa/inet.h>
+#include <getopt.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/eventfd.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+#include <rte_malloc.h>
+#include <rte_virtio_net.h>
+#include <rte_ip.h>
+#include <rte_tcp.h>
+
+#include "main.h"
+
+#ifndef MAX_QUEUES
+#define MAX_QUEUES 128
+#endif
+
+/* the maximum number of external ports supported */
+#define MAX_SUP_PORTS 1
+
+/*
+ * Calculate the number of buffers needed per port
+ */
+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \
+ (num_switching_cores*MAX_PKT_BURST) + \
+ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\
+ ((num_switching_cores+1)*MBUF_CACHE_SIZE))
+
+#define MBUF_CACHE_SIZE 128
+#define MBUF_DATA_SIZE RTE_MBUF_DEFAULT_BUF_SIZE
+
+/*
+ * No frame data buffer allocated from host are required for zero copy
+ * implementation, guest will allocate the frame data buffer, and vhost
+ * directly use it.
+ */
+#define VIRTIO_DESCRIPTOR_LEN_ZCP RTE_MBUF_DEFAULT_DATAROOM
+#define MBUF_DATA_SIZE_ZCP RTE_MBUF_DEFAULT_BUF_SIZE
+#define MBUF_CACHE_SIZE_ZCP 0
+
+#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+#define BURST_RX_WAIT_US 15 /* Defines how long we wait between retries on RX */
+#define BURST_RX_RETRIES 4 /* Number of retries on RX. */
+
+#define JUMBO_FRAME_MAX_SIZE 0x2600
+
+/* State of virtio device. */
+#define DEVICE_MAC_LEARNING 0
+#define DEVICE_RX 1
+#define DEVICE_SAFE_REMOVE 2
+
+/* Config_core_flag status definitions. */
+#define REQUEST_DEV_REMOVAL 1
+#define ACK_DEV_REMOVAL 0
+
+/* Configurable number of RX/TX ring descriptors */
+#define RTE_TEST_RX_DESC_DEFAULT 1024
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+/*
+ * Need refine these 2 macros for legacy and DPDK based front end:
+ * Max vring avail descriptor/entries from guest - MAX_PKT_BURST
+ * And then adjust power 2.
+ */
+/*
+ * For legacy front end, 128 descriptors,
+ * half for virtio header, another half for mbuf.
+ */
+#define RTE_TEST_RX_DESC_DEFAULT_ZCP 32 /* legacy: 32, DPDK virt FE: 128. */
+#define RTE_TEST_TX_DESC_DEFAULT_ZCP 64 /* legacy: 64, DPDK virt FE: 64. */
+
+/* Get first 4 bytes in mbuf headroom. */
+#define MBUF_HEADROOM_UINT32(mbuf) (*(uint32_t *)((uint8_t *)(mbuf) \
+ + sizeof(struct rte_mbuf)))
+
+/* true if x is a power of 2 */
+#define POWEROF2(x) ((((x)-1) & (x)) == 0)
+
+#define INVALID_PORT_ID 0xFF
+
+/* Max number of devices. Limited by vmdq. */
+#define MAX_DEVICES 64
+
+/* Size of buffers used for snprintfs. */
+#define MAX_PRINT_BUFF 6072
+
+/* Maximum character device basename size. */
+#define MAX_BASENAME_SZ 10
+
+/* Maximum long option length for option parsing. */
+#define MAX_LONG_OPT_SZ 64
+
+/* Used to compare MAC addresses. */
+#define MAC_ADDR_CMP 0xFFFFFFFFFFFFULL
+
+/* Number of descriptors per cacheline. */
+#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc))
+
+#define MBUF_EXT_MEM(mb) (rte_mbuf_from_indirect(mb) != (mb))
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+
+/* Promiscuous mode */
+static uint32_t promiscuous;
+
+/*Number of switching cores enabled*/
+static uint32_t num_switching_cores = 0;
+
+/* number of devices/queues to support*/
+static uint32_t num_queues = 0;
+static uint32_t num_devices;
+
+/*
+ * Enable zero copy, pkts buffer will directly dma to hw descriptor,
+ * disabled on default.
+ */
+static uint32_t zero_copy;
+static int mergeable;
+
+/* Do vlan strip on host, enabled on default */
+static uint32_t vlan_strip = 1;
+
+/* number of descriptors to apply*/
+static uint32_t num_rx_descriptor = RTE_TEST_RX_DESC_DEFAULT_ZCP;
+static uint32_t num_tx_descriptor = RTE_TEST_TX_DESC_DEFAULT_ZCP;
+
+/* max ring descriptor, ixgbe, i40e, e1000 all are 4096. */
+#define MAX_RING_DESC 4096
+
+struct vpool {
+ struct rte_mempool *pool;
+ struct rte_ring *ring;
+ uint32_t buf_size;
+} vpool_array[MAX_QUEUES+MAX_QUEUES];
+
+/* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
+typedef enum {
+ VM2VM_DISABLED = 0,
+ VM2VM_SOFTWARE = 1,
+ VM2VM_HARDWARE = 2,
+ VM2VM_LAST
+} vm2vm_type;
+static vm2vm_type vm2vm_mode = VM2VM_SOFTWARE;
+
+/* The type of host physical address translated from guest physical address. */
+typedef enum {
+ PHYS_ADDR_CONTINUOUS = 0,
+ PHYS_ADDR_CROSS_SUBREG = 1,
+ PHYS_ADDR_INVALID = 2,
+ PHYS_ADDR_LAST
+} hpa_type;
+
+/* Enable stats. */
+static uint32_t enable_stats = 0;
+/* Enable retries on RX. */
+static uint32_t enable_retry = 1;
+
+/* Disable TX checksum offload */
+static uint32_t enable_tx_csum;
+
+/* Disable TSO offload */
+static uint32_t enable_tso;
+
+/* Specify timeout (in useconds) between retries on RX. */
+static uint32_t burst_rx_delay_time = BURST_RX_WAIT_US;
+/* Specify the number of retries on RX. */
+static uint32_t burst_rx_retry_num = BURST_RX_RETRIES;
+
+/* Character device basename. Can be set by user. */
+static char dev_basename[MAX_BASENAME_SZ] = "vhost-net";
+
+/* empty vmdq configuration structure. Filled in programatically */
+static struct rte_eth_conf vmdq_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ /*
+ * It is necessary for 1G NIC such as I350,
+ * this fixes bug of ipv4 forwarding in guest can't
+ * forward pakets from one virtio dev to another virtio dev.
+ */
+ .hw_vlan_strip = 1, /**< VLAN strip enabled. */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .rx_adv_conf = {
+ /*
+ * should be overridden separately in code with
+ * appropriate values
+ */
+ .vmdq_rx_conf = {
+ .nb_queue_pools = ETH_8_POOLS,
+ .enable_default_pool = 0,
+ .default_pool = 0,
+ .nb_pool_maps = 0,
+ .pool_map = {{0, 0},},
+ },
+ },
+};
+
+static unsigned lcore_ids[RTE_MAX_LCORE];
+static uint8_t ports[RTE_MAX_ETHPORTS];
+static unsigned num_ports = 0; /**< The number of ports specified in command line */
+static uint16_t num_pf_queues, num_vmdq_queues;
+static uint16_t vmdq_pool_base, vmdq_queue_base;
+static uint16_t queues_per_pool;
+
+static const uint16_t external_pkt_default_vlan_tag = 2000;
+const uint16_t vlan_tags[] = {
+ 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
+ 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
+ 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+ 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
+ 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+};
+
+/* ethernet addresses of ports */
+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* heads for the main used and free linked lists for the data path. */
+static struct virtio_net_data_ll *ll_root_used = NULL;
+static struct virtio_net_data_ll *ll_root_free = NULL;
+
+/* Array of data core structures containing information on individual core linked lists. */
+static struct lcore_info lcore_info[RTE_MAX_LCORE];
+
+/* Used for queueing bursts of TX packets. */
+struct mbuf_table {
+ unsigned len;
+ unsigned txq_id;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+/* TX queue for each data core. */
+struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
+
+/* TX queue fori each virtio device for zero copy. */
+struct mbuf_table tx_queue_zcp[MAX_QUEUES];
+
+/* Vlan header struct used to insert vlan tags on TX. */
+struct vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+/* Header lengths. */
+#define VLAN_HLEN 4
+#define VLAN_ETH_HLEN 18
+
+/* Per-device statistics struct */
+struct device_statistics {
+ uint64_t tx_total;
+ rte_atomic64_t rx_total_atomic;
+ uint64_t rx_total;
+ uint64_t tx;
+ rte_atomic64_t rx_atomic;
+ uint64_t rx;
+} __rte_cache_aligned;
+struct device_statistics dev_statistics[MAX_DEVICES];
+
+/*
+ * Builds up the correct configuration for VMDQ VLAN pool map
+ * according to the pool & queue limits.
+ */
+static inline int
+get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
+{
+ struct rte_eth_vmdq_rx_conf conf;
+ struct rte_eth_vmdq_rx_conf *def_conf =
+ &vmdq_conf_default.rx_adv_conf.vmdq_rx_conf;
+ unsigned i;
+
+ memset(&conf, 0, sizeof(conf));
+ conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
+ conf.nb_pool_maps = num_devices;
+ conf.enable_loop_back = def_conf->enable_loop_back;
+ conf.rx_mode = def_conf->rx_mode;
+
+ for (i = 0; i < conf.nb_pool_maps; i++) {
+ conf.pool_map[i].vlan_id = vlan_tags[ i ];
+ conf.pool_map[i].pools = (1UL << i);
+ }
+
+ (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
+ sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
+ return 0;
+}
+
+/*
+ * Validate the device number according to the max pool number gotten form
+ * dev_info. If the device number is invalid, give the error message and
+ * return -1. Each device must have its own pool.
+ */
+static inline int
+validate_num_devices(uint32_t max_nb_devices)
+{
+ if (num_devices > max_nb_devices) {
+ RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf port_conf;
+ struct rte_eth_rxconf *rxconf;
+ struct rte_eth_txconf *txconf;
+ int16_t rx_rings, tx_rings;
+ uint16_t rx_ring_size, tx_ring_size;
+ int retval;
+ uint16_t q;
+
+ /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
+ rte_eth_dev_info_get (port, &dev_info);
+
+ if (dev_info.max_rx_queues > MAX_QUEUES) {
+ rte_exit(EXIT_FAILURE,
+ "please define MAX_QUEUES no less than %u in %s\n",
+ dev_info.max_rx_queues, __FILE__);
+ }
+
+ rxconf = &dev_info.default_rxconf;
+ txconf = &dev_info.default_txconf;
+ rxconf->rx_drop_en = 1;
+
+ /* Enable vlan offload */
+ txconf->txq_flags &= ~ETH_TXQ_FLAGS_NOVLANOFFL;
+
+ /*
+ * Zero copy defers queue RX/TX start to the time when guest
+ * finishes its startup and packet buffers from that guest are
+ * available.
+ */
+ if (zero_copy) {
+ rxconf->rx_deferred_start = 1;
+ rxconf->rx_drop_en = 0;
+ txconf->tx_deferred_start = 1;
+ }
+
+ /*configure the number of supported virtio devices based on VMDQ limits */
+ num_devices = dev_info.max_vmdq_pools;
+
+ if (zero_copy) {
+ rx_ring_size = num_rx_descriptor;
+ tx_ring_size = num_tx_descriptor;
+ tx_rings = dev_info.max_tx_queues;
+ } else {
+ rx_ring_size = RTE_TEST_RX_DESC_DEFAULT;
+ tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
+ tx_rings = (uint16_t)rte_lcore_count();
+ }
+
+ retval = validate_num_devices(MAX_DEVICES);
+ if (retval < 0)
+ return retval;
+
+ /* Get port configuration. */
+ retval = get_eth_conf(&port_conf, num_devices);
+ if (retval < 0)
+ return retval;
+ /* NIC queues are divided into pf queues and vmdq queues. */
+ num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
+ queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+ num_vmdq_queues = num_devices * queues_per_pool;
+ num_queues = num_pf_queues + num_vmdq_queues;
+ vmdq_queue_base = dev_info.vmdq_queue_base;
+ vmdq_pool_base = dev_info.vmdq_pool_base;
+ printf("pf queue num: %u, configured vmdq pool num: %u, each vmdq pool has %u queues\n",
+ num_pf_queues, num_devices, queues_per_pool);
+
+ if (port >= rte_eth_dev_count()) return -1;
+
+ if (enable_tx_csum == 0)
+ rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_CSUM);
+
+ if (enable_tso == 0) {
+ rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO4);
+ rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_HOST_TSO6);
+ }
+
+ rx_rings = (uint16_t)dev_info.max_rx_queues;
+ /* Configure ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ /* Setup the queues. */
+ for (q = 0; q < rx_rings; q ++) {
+ retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
+ rte_eth_dev_socket_id(port),
+ rxconf,
+ vpool_array[q].pool);
+ if (retval < 0)
+ return retval;
+ }
+ for (q = 0; q < tx_rings; q ++) {
+ retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
+ rte_eth_dev_socket_id(port),
+ txconf);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the device. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0) {
+ RTE_LOG(ERR, VHOST_DATA, "Failed to start the device.\n");
+ return retval;
+ }
+
+ if (promiscuous)
+ rte_eth_promiscuous_enable(port);
+
+ rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+ RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
+ RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ vmdq_ports_eth_addr[port].addr_bytes[0],
+ vmdq_ports_eth_addr[port].addr_bytes[1],
+ vmdq_ports_eth_addr[port].addr_bytes[2],
+ vmdq_ports_eth_addr[port].addr_bytes[3],
+ vmdq_ports_eth_addr[port].addr_bytes[4],
+ vmdq_ports_eth_addr[port].addr_bytes[5]);
+
+ return 0;
+}
+
+/*
+ * Set character device basename.
+ */
+static int
+us_vhost_parse_basename(const char *q_arg)
+{
+ /* parse number string */
+
+ if (strnlen(q_arg, MAX_BASENAME_SZ) > MAX_BASENAME_SZ)
+ return -1;
+ else
+ snprintf((char*)&dev_basename, MAX_BASENAME_SZ, "%s", q_arg);
+
+ return 0;
+}
+
+/*
+ * Parse the portmask provided at run time.
+ */
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ errno = 0;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+
+}
+
+/*
+ * Parse num options at run time.
+ */
+static int
+parse_num_opt(const char *q_arg, uint32_t max_valid_value)
+{
+ char *end = NULL;
+ unsigned long num;
+
+ errno = 0;
+
+ /* parse unsigned int string */
+ num = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ if (num > max_valid_value)
+ return -1;
+
+ return num;
+
+}
+
+/*
+ * Display usage
+ */
+static void
+us_vhost_usage(const char *prgname)
+{
+ RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK\n"
+ " --vm2vm [0|1|2]\n"
+ " --rx_retry [0|1] --mergeable [0|1] --stats [0-N]\n"
+ " --dev-basename <name>\n"
+ " --nb-devices ND\n"
+ " -p PORTMASK: Set mask for ports to be used by application\n"
+ " --vm2vm [0|1|2]: disable/software(default)/hardware vm2vm comms\n"
+ " --rx-retry [0|1]: disable/enable(default) retries on rx. Enable retry if destintation queue is full\n"
+ " --rx-retry-delay [0-N]: timeout(in usecond) between retries on RX. This makes effect only if retries on rx enabled\n"
+ " --rx-retry-num [0-N]: the number of retries on rx. This makes effect only if retries on rx enabled\n"
+ " --mergeable [0|1]: disable(default)/enable RX mergeable buffers\n"
+ " --vlan-strip [0|1]: disable/enable(default) RX VLAN strip on host\n"
+ " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n"
+ " --dev-basename: The basename to be used for the character device.\n"
+ " --zero-copy [0|1]: disable(default)/enable rx/tx "
+ "zero copy\n"
+ " --rx-desc-num [0-N]: the number of descriptors on rx, "
+ "used only when zero copy is enabled.\n"
+ " --tx-desc-num [0-N]: the number of descriptors on tx, "
+ "used only when zero copy is enabled.\n"
+ " --tx-csum [0|1] disable/enable TX checksum offload.\n"
+ " --tso [0|1] disable/enable TCP segment offload.\n",
+ prgname);
+}
+
+/*
+ * Parse the arguments given in the command line of the application.
+ */
+static int
+us_vhost_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ int option_index;
+ unsigned i;
+ const char *prgname = argv[0];
+ static struct option long_option[] = {
+ {"vm2vm", required_argument, NULL, 0},
+ {"rx-retry", required_argument, NULL, 0},
+ {"rx-retry-delay", required_argument, NULL, 0},
+ {"rx-retry-num", required_argument, NULL, 0},
+ {"mergeable", required_argument, NULL, 0},
+ {"vlan-strip", required_argument, NULL, 0},
+ {"stats", required_argument, NULL, 0},
+ {"dev-basename", required_argument, NULL, 0},
+ {"zero-copy", required_argument, NULL, 0},
+ {"rx-desc-num", required_argument, NULL, 0},
+ {"tx-desc-num", required_argument, NULL, 0},
+ {"tx-csum", required_argument, NULL, 0},
+ {"tso", required_argument, NULL, 0},
+ {NULL, 0, 0, 0},
+ };
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:P",
+ long_option, &option_index)) != EOF) {
+ switch (opt) {
+ /* Portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ break;
+
+ case 'P':
+ promiscuous = 1;
+ vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.rx_mode =
+ ETH_VMDQ_ACCEPT_BROADCAST |
+ ETH_VMDQ_ACCEPT_MULTICAST;
+ rte_vhost_feature_enable(1ULL << VIRTIO_NET_F_CTRL_RX);
+
+ break;
+
+ case 0:
+ /* Enable/disable vm2vm comms. */
+ if (!strncmp(long_option[option_index].name, "vm2vm",
+ MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, (VM2VM_LAST - 1));
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for "
+ "vm2vm [0|1|2]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ vm2vm_mode = (vm2vm_type)ret;
+ }
+ }
+
+ /* Enable/disable retries on RX. */
+ if (!strncmp(long_option[option_index].name, "rx-retry", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ enable_retry = ret;
+ }
+ }
+
+ /* Enable/disable TX checksum offload. */
+ if (!strncmp(long_option[option_index].name, "tx-csum", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tx-csum [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else
+ enable_tx_csum = ret;
+ }
+
+ /* Enable/disable TSO offload. */
+ if (!strncmp(long_option[option_index].name, "tso", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for tso [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else
+ enable_tso = ret;
+ }
+
+ /* Specify the retries delay time (in useconds) on RX. */
+ if (!strncmp(long_option[option_index].name, "rx-retry-delay", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-delay [0-N]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ burst_rx_delay_time = ret;
+ }
+ }
+
+ /* Specify the retries number on RX. */
+ if (!strncmp(long_option[option_index].name, "rx-retry-num", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for rx-retry-num [0-N]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ burst_rx_retry_num = ret;
+ }
+ }
+
+ /* Enable/disable RX mergeable buffers. */
+ if (!strncmp(long_option[option_index].name, "mergeable", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for mergeable [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ mergeable = !!ret;
+ if (ret) {
+ vmdq_conf_default.rxmode.jumbo_frame = 1;
+ vmdq_conf_default.rxmode.max_rx_pkt_len
+ = JUMBO_FRAME_MAX_SIZE;
+ }
+ }
+ }
+
+ /* Enable/disable RX VLAN strip on host. */
+ if (!strncmp(long_option[option_index].name,
+ "vlan-strip", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for VLAN strip [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ vlan_strip = !!ret;
+ vmdq_conf_default.rxmode.hw_vlan_strip =
+ vlan_strip;
+ }
+ }
+
+ /* Enable/disable stats. */
+ if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ enable_stats = ret;
+ }
+ }
+
+ /* Set character device basename. */
+ if (!strncmp(long_option[option_index].name, "dev-basename", MAX_LONG_OPT_SZ)) {
+ if (us_vhost_parse_basename(optarg) == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for character device basename (Max %d characters)\n", MAX_BASENAME_SZ);
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ }
+
+ /* Enable/disable rx/tx zero copy. */
+ if (!strncmp(long_option[option_index].name,
+ "zero-copy", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument"
+ " for zero-copy [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else
+ zero_copy = ret;
+ }
+
+ /* Specify the descriptor number on RX. */
+ if (!strncmp(long_option[option_index].name,
+ "rx-desc-num", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, MAX_RING_DESC);
+ if ((ret == -1) || (!POWEROF2(ret))) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for rx-desc-num[0-N],"
+ "power of 2 required.\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ num_rx_descriptor = ret;
+ }
+ }
+
+ /* Specify the descriptor number on TX. */
+ if (!strncmp(long_option[option_index].name,
+ "tx-desc-num", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, MAX_RING_DESC);
+ if ((ret == -1) || (!POWEROF2(ret))) {
+ RTE_LOG(INFO, VHOST_CONFIG,
+ "Invalid argument for tx-desc-num [0-N],"
+ "power of 2 required.\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ num_tx_descriptor = ret;
+ }
+ }
+
+ break;
+
+ /* Invalid option - print options. */
+ default:
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (enabled_port_mask & (1 << i))
+ ports[num_ports++] = (uint8_t)i;
+ }
+
+ if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
+ RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
+ return -1;
+ }
+
+ if ((zero_copy == 1) && (vm2vm_mode == VM2VM_SOFTWARE)) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "Vhost zero copy doesn't support software vm2vm,"
+ "please specify 'vm2vm 2' to use hardware vm2vm.\n");
+ return -1;
+ }
+
+ if ((zero_copy == 1) && (vmdq_conf_default.rxmode.jumbo_frame == 1)) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "Vhost zero copy doesn't support jumbo frame,"
+ "please specify '--mergeable 0' to disable the "
+ "mergeable feature.\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Update the global var NUM_PORTS and array PORTS according to system ports number
+ * and return valid ports number
+ */
+static unsigned check_ports_num(unsigned nb_ports)
+{
+ unsigned valid_num_ports = num_ports;
+ unsigned portid;
+
+ if (num_ports > nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
+ num_ports, nb_ports);
+ num_ports = nb_ports;
+ }
+
+ for (portid = 0; portid < num_ports; portid ++) {
+ if (ports[portid] >= nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
+ ports[portid], (nb_ports - 1));
+ ports[portid] = INVALID_PORT_ID;
+ valid_num_ports--;
+ }
+ }
+ return valid_num_ports;
+}
+
+/*
+ * Macro to print out packet contents. Wrapped in debug define so that the
+ * data path is not effected when debug is disabled.
+ */
+#ifdef DEBUG
+#define PRINT_PACKET(device, addr, size, header) do { \
+ char *pkt_addr = (char*)(addr); \
+ unsigned int index; \
+ char packet[MAX_PRINT_BUFF]; \
+ \
+ if ((header)) \
+ snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
+ else \
+ snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
+ for (index = 0; index < (size); index++) { \
+ snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \
+ "%02hhx ", pkt_addr[index]); \
+ } \
+ snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \
+ \
+ LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while(0)
+#else
+#define PRINT_PACKET(device, addr, size, header) do{} while(0)
+#endif
+
+/*
+ * Function to convert guest physical addresses to vhost physical addresses.
+ * This is used to convert virtio buffer addresses.
+ */
+static inline uint64_t __attribute__((always_inline))
+gpa_to_hpa(struct vhost_dev *vdev, uint64_t guest_pa,
+ uint32_t buf_len, hpa_type *addr_type)
+{
+ struct virtio_memory_regions_hpa *region;
+ uint32_t regionidx;
+ uint64_t vhost_pa = 0;
+
+ *addr_type = PHYS_ADDR_INVALID;
+
+ for (regionidx = 0; regionidx < vdev->nregions_hpa; regionidx++) {
+ region = &vdev->regions_hpa[regionidx];
+ if ((guest_pa >= region->guest_phys_address) &&
+ (guest_pa <= region->guest_phys_address_end)) {
+ vhost_pa = region->host_phys_addr_offset + guest_pa;
+ if (likely((guest_pa + buf_len - 1)
+ <= region->guest_phys_address_end))
+ *addr_type = PHYS_ADDR_CONTINUOUS;
+ else
+ *addr_type = PHYS_ADDR_CROSS_SUBREG;
+ break;
+ }
+ }
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| HPA %p\n",
+ vdev->dev->device_fh, (void *)(uintptr_t)guest_pa,
+ (void *)(uintptr_t)vhost_pa);
+
+ return vhost_pa;
+}
+
+/*
+ * Compares a packet destination MAC address to a device MAC address.
+ */
+static inline int __attribute__((always_inline))
+ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)
+{
+ return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0;
+}
+
+/*
+ * This function learns the MAC address of the device and registers this along with a
+ * vlan tag to a VMDQ.
+ */
+static int
+link_vmdq(struct vhost_dev *vdev, struct rte_mbuf *m)
+{
+ struct ether_hdr *pkt_hdr;
+ struct virtio_net_data_ll *dev_ll;
+ struct virtio_net *dev = vdev->dev;
+ int i, ret;
+
+ /* Learn MAC address of guest device from packet */
+ pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ dev_ll = ll_root_used;
+
+ while (dev_ll != NULL) {
+ if (ether_addr_cmp(&(pkt_hdr->s_addr), &dev_ll->vdev->mac_address)) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh);
+ return -1;
+ }
+ dev_ll = dev_ll->next;
+ }
+
+ for (i = 0; i < ETHER_ADDR_LEN; i++)
+ vdev->mac_address.addr_bytes[i] = pkt_hdr->s_addr.addr_bytes[i];
+
+ /* vlan_tag currently uses the device_id. */
+ vdev->vlan_tag = vlan_tags[dev->device_fh];
+
+ /* Print out VMDQ registration info. */
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n",
+ dev->device_fh,
+ vdev->mac_address.addr_bytes[0], vdev->mac_address.addr_bytes[1],
+ vdev->mac_address.addr_bytes[2], vdev->mac_address.addr_bytes[3],
+ vdev->mac_address.addr_bytes[4], vdev->mac_address.addr_bytes[5],
+ vdev->vlan_tag);
+
+ /* Register the MAC address. */
+ ret = rte_eth_dev_mac_addr_add(ports[0], &vdev->mac_address,
+ (uint32_t)dev->device_fh + vmdq_pool_base);
+ if (ret)
+ RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n",
+ dev->device_fh);
+
+ /* Enable stripping of the vlan tag as we handle routing. */
+ if (vlan_strip)
+ rte_eth_dev_set_vlan_strip_on_queue(ports[0],
+ (uint16_t)vdev->vmdq_rx_q, 1);
+
+ /* Set device as ready for RX. */
+ vdev->ready = DEVICE_RX;
+
+ return 0;
+}
+
+/*
+ * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
+ * queue before disabling RX on the device.
+ */
+static inline void
+unlink_vmdq(struct vhost_dev *vdev)
+{
+ unsigned i = 0;
+ unsigned rx_count;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+
+ if (vdev->ready == DEVICE_RX) {
+ /*clear MAC and VLAN settings*/
+ rte_eth_dev_mac_addr_remove(ports[0], &vdev->mac_address);
+ for (i = 0; i < 6; i++)
+ vdev->mac_address.addr_bytes[i] = 0;
+
+ vdev->vlan_tag = 0;
+
+ /*Clear out the receive buffers*/
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+
+ while (rx_count) {
+ for (i = 0; i < rx_count; i++)
+ rte_pktmbuf_free(pkts_burst[i]);
+
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+ }
+
+ vdev->ready = DEVICE_MAC_LEARNING;
+ }
+}
+
+/*
+ * Check if the packet destination MAC address is for a local device. If so then put
+ * the packet on that devices RX queue. If not then return.
+ */
+static inline int __attribute__((always_inline))
+virtio_tx_local(struct vhost_dev *vdev, struct rte_mbuf *m)
+{
+ struct virtio_net_data_ll *dev_ll;
+ struct ether_hdr *pkt_hdr;
+ uint64_t ret = 0;
+ struct virtio_net *dev = vdev->dev;
+ struct virtio_net *tdev; /* destination virito device */
+
+ pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /*get the used devices list*/
+ dev_ll = ll_root_used;
+
+ while (dev_ll != NULL) {
+ if ((dev_ll->vdev->ready == DEVICE_RX) && ether_addr_cmp(&(pkt_hdr->d_addr),
+ &dev_ll->vdev->mac_address)) {
+
+ /* Drop the packet if the TX packet is destined for the TX device. */
+ if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n",
+ dev->device_fh);
+ return 0;
+ }
+ tdev = dev_ll->vdev->dev;
+
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", tdev->device_fh);
+
+ if (unlikely(dev_ll->vdev->remove)) {
+ /*drop the packet if the device is marked for removal*/
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", tdev->device_fh);
+ } else {
+ /*send the packet to the local virtio device*/
+ ret = rte_vhost_enqueue_burst(tdev, VIRTIO_RXQ, &m, 1);
+ if (enable_stats) {
+ rte_atomic64_add(
+ &dev_statistics[tdev->device_fh].rx_total_atomic,
+ 1);
+ rte_atomic64_add(
+ &dev_statistics[tdev->device_fh].rx_atomic,
+ ret);
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx += ret;
+ }
+ }
+
+ return 0;
+ }
+ dev_ll = dev_ll->next;
+ }
+
+ return -1;
+}
+
+/*
+ * Check if the destination MAC of a packet is one local VM,
+ * and get its vlan tag, and offset if it is.
+ */
+static inline int __attribute__((always_inline))
+find_local_dest(struct virtio_net *dev, struct rte_mbuf *m,
+ uint32_t *offset, uint16_t *vlan_tag)
+{
+ struct virtio_net_data_ll *dev_ll = ll_root_used;
+ struct ether_hdr *pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ while (dev_ll != NULL) {
+ if ((dev_ll->vdev->ready == DEVICE_RX)
+ && ether_addr_cmp(&(pkt_hdr->d_addr),
+ &dev_ll->vdev->mac_address)) {
+ /*
+ * Drop the packet if the TX packet is
+ * destined for the TX device.
+ */
+ if (dev_ll->vdev->dev->device_fh == dev->device_fh) {
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") TX: Source and destination"
+ " MAC addresses are the same. Dropping "
+ "packet.\n",
+ dev_ll->vdev->dev->device_fh);
+ return -1;
+ }
+
+ /*
+ * HW vlan strip will reduce the packet length
+ * by minus length of vlan tag, so need restore
+ * the packet length by plus it.
+ */
+ *offset = VLAN_HLEN;
+ *vlan_tag =
+ (uint16_t)
+ vlan_tags[(uint16_t)dev_ll->vdev->dev->device_fh];
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") TX: pkt to local VM device id:"
+ "(%"PRIu64") vlan tag: %d.\n",
+ dev->device_fh, dev_ll->vdev->dev->device_fh,
+ (int)*vlan_tag);
+
+ break;
+ }
+ dev_ll = dev_ll->next;
+ }
+ return 0;
+}
+
+static uint16_t
+get_psd_sum(void *l3_hdr, uint64_t ol_flags)
+{
+ if (ol_flags & PKT_TX_IPV4)
+ return rte_ipv4_phdr_cksum(l3_hdr, ol_flags);
+ else /* assume ethertype == ETHER_TYPE_IPv6 */
+ return rte_ipv6_phdr_cksum(l3_hdr, ol_flags);
+}
+
+static void virtio_tx_offload(struct rte_mbuf *m)
+{
+ void *l3_hdr;
+ struct ipv4_hdr *ipv4_hdr = NULL;
+ struct tcp_hdr *tcp_hdr = NULL;
+ struct ether_hdr *eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ l3_hdr = (char *)eth_hdr + m->l2_len;
+
+ if (m->ol_flags & PKT_TX_IPV4) {
+ ipv4_hdr = l3_hdr;
+ ipv4_hdr->hdr_checksum = 0;
+ m->ol_flags |= PKT_TX_IP_CKSUM;
+ }
+
+ tcp_hdr = (struct tcp_hdr *)((char *)l3_hdr + m->l3_len);
+ tcp_hdr->cksum = get_psd_sum(l3_hdr, m->ol_flags);
+}
+
+/*
+ * This function routes the TX packet to the correct interface. This may be a local device
+ * or the physical port.
+ */
+static inline void __attribute__((always_inline))
+virtio_tx_route(struct vhost_dev *vdev, struct rte_mbuf *m, uint16_t vlan_tag)
+{
+ struct mbuf_table *tx_q;
+ struct rte_mbuf **m_table;
+ unsigned len, ret, offset = 0;
+ const uint16_t lcore_id = rte_lcore_id();
+ struct virtio_net *dev = vdev->dev;
+ struct ether_hdr *nh;
+
+ /*check if destination is local VM*/
+ if ((vm2vm_mode == VM2VM_SOFTWARE) && (virtio_tx_local(vdev, m) == 0)) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+
+ if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
+ if (unlikely(find_local_dest(dev, m, &offset, &vlan_tag) != 0)) {
+ rte_pktmbuf_free(m);
+ return;
+ }
+ }
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh);
+
+ /*Add packet to the port tx queue*/
+ tx_q = &lcore_tx_queue[lcore_id];
+ len = tx_q->len;
+
+ nh = rte_pktmbuf_mtod(m, struct ether_hdr *);
+ if (unlikely(nh->ether_type == rte_cpu_to_be_16(ETHER_TYPE_VLAN))) {
+ /* Guest has inserted the vlan tag. */
+ struct vlan_hdr *vh = (struct vlan_hdr *) (nh + 1);
+ uint16_t vlan_tag_be = rte_cpu_to_be_16(vlan_tag);
+ if ((vm2vm_mode == VM2VM_HARDWARE) &&
+ (vh->vlan_tci != vlan_tag_be))
+ vh->vlan_tci = vlan_tag_be;
+ } else {
+ m->ol_flags |= PKT_TX_VLAN_PKT;
+
+ /*
+ * Find the right seg to adjust the data len when offset is
+ * bigger than tail room size.
+ */
+ if (unlikely(vm2vm_mode == VM2VM_HARDWARE)) {
+ if (likely(offset <= rte_pktmbuf_tailroom(m)))
+ m->data_len += offset;
+ else {
+ struct rte_mbuf *seg = m;
+
+ while ((seg->next != NULL) &&
+ (offset > rte_pktmbuf_tailroom(seg)))
+ seg = seg->next;
+
+ seg->data_len += offset;
+ }
+ m->pkt_len += offset;
+ }
+
+ m->vlan_tci = vlan_tag;
+ }
+
+ if (m->ol_flags & PKT_TX_TCP_SEG)
+ virtio_tx_offload(m);
+
+ tx_q->m_table[len] = m;
+ len++;
+ if (enable_stats) {
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx++;
+ }
+
+ if (unlikely(len == MAX_PKT_BURST)) {
+ m_table = (struct rte_mbuf **)tx_q->m_table;
+ ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
+ /* Free any buffers not handled by TX and update the port stats. */
+ if (unlikely(ret < len)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < len);
+ }
+
+ len = 0;
+ }
+
+ tx_q->len = len;
+ return;
+}
+/*
+ * This function is called by each data core. It handles all RX/TX registered with the
+ * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
+ * with all devices in the main linked list.
+ */
+static int
+switch_worker(__attribute__((unused)) void *arg)
+{
+ struct rte_mempool *mbuf_pool = arg;
+ struct virtio_net *dev = NULL;
+ struct vhost_dev *vdev = NULL;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct virtio_net_data_ll *dev_ll;
+ struct mbuf_table *tx_q;
+ volatile struct lcore_ll_info *lcore_ll;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
+ unsigned ret, i;
+ const uint16_t lcore_id = rte_lcore_id();
+ const uint16_t num_cores = (uint16_t)rte_lcore_count();
+ uint16_t rx_count = 0;
+ uint16_t tx_count;
+ uint32_t retry = 0;
+
+ RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
+ lcore_ll = lcore_info[lcore_id].lcore_ll;
+ prev_tsc = 0;
+
+ tx_q = &lcore_tx_queue[lcore_id];
+ for (i = 0; i < num_cores; i ++) {
+ if (lcore_ids[i] == lcore_id) {
+ tx_q->txq_id = i;
+ break;
+ }
+ }
+
+ while(1) {
+ cur_tsc = rte_rdtsc();
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ if (tx_q->len) {
+ LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len);
+
+ /*Tx any packets in the queue*/
+ ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
+ (struct rte_mbuf **)tx_q->m_table,
+ (uint16_t)tx_q->len);
+ if (unlikely(ret < tx_q->len)) {
+ do {
+ rte_pktmbuf_free(tx_q->m_table[ret]);
+ } while (++ret < tx_q->len);
+ }
+
+ tx_q->len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+
+ }
+
+ rte_prefetch0(lcore_ll->ll_root_used);
+ /*
+ * Inform the configuration core that we have exited the linked list and that no devices are
+ * in use if requested.
+ */
+ if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
+ lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+
+ /*
+ * Process devices
+ */
+ dev_ll = lcore_ll->ll_root_used;
+
+ while (dev_ll != NULL) {
+ /*get virtio device ID*/
+ vdev = dev_ll->vdev;
+ dev = vdev->dev;
+
+ if (unlikely(vdev->remove)) {
+ dev_ll = dev_ll->next;
+ unlink_vmdq(vdev);
+ vdev->ready = DEVICE_SAFE_REMOVE;
+ continue;
+ }
+ if (likely(vdev->ready == DEVICE_RX)) {
+ /*Handle guest RX*/
+ rx_count = rte_eth_rx_burst(ports[0],
+ vdev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+
+ if (rx_count) {
+ /*
+ * Retry is enabled and the queue is full then we wait and retry to avoid packet loss
+ * Here MAX_PKT_BURST must be less than virtio queue size
+ */
+ if (enable_retry && unlikely(rx_count > rte_vring_available_entries(dev, VIRTIO_RXQ))) {
+ for (retry = 0; retry < burst_rx_retry_num; retry++) {
+ rte_delay_us(burst_rx_delay_time);
+ if (rx_count <= rte_vring_available_entries(dev, VIRTIO_RXQ))
+ break;
+ }
+ }
+ ret_count = rte_vhost_enqueue_burst(dev, VIRTIO_RXQ, pkts_burst, rx_count);
+ if (enable_stats) {
+ rte_atomic64_add(
+ &dev_statistics[dev_ll->vdev->dev->device_fh].rx_total_atomic,
+ rx_count);
+ rte_atomic64_add(
+ &dev_statistics[dev_ll->vdev->dev->device_fh].rx_atomic, ret_count);
+ }
+ while (likely(rx_count)) {
+ rx_count--;
+ rte_pktmbuf_free(pkts_burst[rx_count]);
+ }
+
+ }
+ }
+
+ if (likely(!vdev->remove)) {
+ /* Handle guest TX*/
+ tx_count = rte_vhost_dequeue_burst(dev, VIRTIO_TXQ, mbuf_pool, pkts_burst, MAX_PKT_BURST);
+ /* If this is the first received packet we need to learn the MAC and setup VMDQ */
+ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING) && tx_count) {
+ if (vdev->remove || (link_vmdq(vdev, pkts_burst[0]) == -1)) {
+ while (tx_count)
+ rte_pktmbuf_free(pkts_burst[--tx_count]);
+ }
+ }
+ for (i = 0; i < tx_count; ++i) {
+ virtio_tx_route(vdev, pkts_burst[i],
+ vlan_tags[(uint16_t)dev->device_fh]);
+ }
+ }
+
+ /*move to the next device in the list*/
+ dev_ll = dev_ll->next;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * This function gets available ring number for zero copy rx.
+ * Only one thread will call this funciton for a paticular virtio device,
+ * so, it is designed as non-thread-safe function.
+ */
+static inline uint32_t __attribute__((always_inline))
+get_available_ring_num_zcp(struct virtio_net *dev)
+{
+ struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];
+ uint16_t avail_idx;
+
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+ return (uint32_t)(avail_idx - vq->last_used_idx_res);
+}
+
+/*
+ * This function gets available ring index for zero copy rx,
+ * it will retry 'burst_rx_retry_num' times till it get enough ring index.
+ * Only one thread will call this funciton for a paticular virtio device,
+ * so, it is designed as non-thread-safe function.
+ */
+static inline uint32_t __attribute__((always_inline))
+get_available_ring_index_zcp(struct virtio_net *dev,
+ uint16_t *res_base_idx, uint32_t count)
+{
+ struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_RXQ];
+ uint16_t avail_idx;
+ uint32_t retry = 0;
+ uint16_t free_entries;
+
+ *res_base_idx = vq->last_used_idx_res;
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+ free_entries = (avail_idx - *res_base_idx);
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") in get_available_ring_index_zcp: "
+ "avail idx: %d, "
+ "res base idx:%d, free entries:%d\n",
+ dev->device_fh, avail_idx, *res_base_idx,
+ free_entries);
+
+ /*
+ * If retry is enabled and the queue is full then we wait
+ * and retry to avoid packet loss.
+ */
+ if (enable_retry && unlikely(count > free_entries)) {
+ for (retry = 0; retry < burst_rx_retry_num; retry++) {
+ rte_delay_us(burst_rx_delay_time);
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+ free_entries = (avail_idx - *res_base_idx);
+ if (count <= free_entries)
+ break;
+ }
+ }
+
+ /*check that we have enough buffers*/
+ if (unlikely(count > free_entries))
+ count = free_entries;
+
+ if (unlikely(count == 0)) {
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") Fail in get_available_ring_index_zcp: "
+ "avail idx: %d, res base idx:%d, free entries:%d\n",
+ dev->device_fh, avail_idx,
+ *res_base_idx, free_entries);
+ return 0;
+ }
+
+ vq->last_used_idx_res = *res_base_idx + count;
+
+ return count;
+}
+
+/*
+ * This function put descriptor back to used list.
+ */
+static inline void __attribute__((always_inline))
+put_desc_to_used_list_zcp(struct vhost_virtqueue *vq, uint16_t desc_idx)
+{
+ uint16_t res_cur_idx = vq->last_used_idx;
+ vq->used->ring[res_cur_idx & (vq->size - 1)].id = (uint32_t)desc_idx;
+ vq->used->ring[res_cur_idx & (vq->size - 1)].len = 0;
+ rte_compiler_barrier();
+ *(volatile uint16_t *)&vq->used->idx += 1;
+ vq->last_used_idx += 1;
+
+ /* Kick the guest if necessary. */
+ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ eventfd_write(vq->callfd, (eventfd_t)1);
+}
+
+/*
+ * This function get available descriptor from vitio vring and un-attached mbuf
+ * from vpool->ring, and then attach them together. It needs adjust the offset
+ * for buff_addr and phys_addr accroding to PMD implementation, otherwise the
+ * frame data may be put to wrong location in mbuf.
+ */
+static inline void __attribute__((always_inline))
+attach_rxmbuf_zcp(struct virtio_net *dev)
+{
+ uint16_t res_base_idx, desc_idx;
+ uint64_t buff_addr, phys_addr;
+ struct vhost_virtqueue *vq;
+ struct vring_desc *desc;
+ void *obj = NULL;
+ struct rte_mbuf *mbuf;
+ struct vpool *vpool;
+ hpa_type addr_type;
+ struct vhost_dev *vdev = (struct vhost_dev *)dev->priv;
+
+ vpool = &vpool_array[vdev->vmdq_rx_q];
+ vq = dev->virtqueue[VIRTIO_RXQ];
+
+ do {
+ if (unlikely(get_available_ring_index_zcp(vdev->dev, &res_base_idx,
+ 1) != 1))
+ return;
+ desc_idx = vq->avail->ring[(res_base_idx) & (vq->size - 1)];
+
+ desc = &vq->desc[desc_idx];
+ if (desc->flags & VRING_DESC_F_NEXT) {
+ desc = &vq->desc[desc->next];
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len,
+ &addr_type);
+ } else {
+ buff_addr = gpa_to_vva(dev,
+ desc->addr + vq->vhost_hlen);
+ phys_addr = gpa_to_hpa(vdev,
+ desc->addr + vq->vhost_hlen,
+ desc->len, &addr_type);
+ }
+
+ if (unlikely(addr_type == PHYS_ADDR_INVALID)) {
+ RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Invalid frame buffer"
+ " address found when attaching RX frame buffer"
+ " address!\n", dev->device_fh);
+ put_desc_to_used_list_zcp(vq, desc_idx);
+ continue;
+ }
+
+ /*
+ * Check if the frame buffer address from guest crosses
+ * sub-region or not.
+ */
+ if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "(%"PRIu64") Frame buffer address cross "
+ "sub-regioin found when attaching RX frame "
+ "buffer address!\n",
+ dev->device_fh);
+ put_desc_to_used_list_zcp(vq, desc_idx);
+ continue;
+ }
+ } while (unlikely(phys_addr == 0));
+
+ rte_ring_sc_dequeue(vpool->ring, &obj);
+ mbuf = obj;
+ if (unlikely(mbuf == NULL)) {
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in attach_rxmbuf_zcp: "
+ "ring_sc_dequeue fail.\n",
+ dev->device_fh);
+ put_desc_to_used_list_zcp(vq, desc_idx);
+ return;
+ }
+
+ if (unlikely(vpool->buf_size > desc->len)) {
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in attach_rxmbuf_zcp: frame buffer "
+ "length(%d) of descriptor idx: %d less than room "
+ "size required: %d\n",
+ dev->device_fh, desc->len, desc_idx, vpool->buf_size);
+ put_desc_to_used_list_zcp(vq, desc_idx);
+ rte_ring_sp_enqueue(vpool->ring, obj);
+ return;
+ }
+
+ mbuf->buf_addr = (void *)(uintptr_t)(buff_addr - RTE_PKTMBUF_HEADROOM);
+ mbuf->data_off = RTE_PKTMBUF_HEADROOM;
+ mbuf->buf_physaddr = phys_addr - RTE_PKTMBUF_HEADROOM;
+ mbuf->data_len = desc->len;
+ MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in attach_rxmbuf_zcp: res base idx:%d, "
+ "descriptor idx:%d\n",
+ dev->device_fh, res_base_idx, desc_idx);
+
+ __rte_mbuf_raw_free(mbuf);
+
+ return;
+}
+
+/*
+ * Detach an attched packet mbuf -
+ * - restore original mbuf address and length values.
+ * - reset pktmbuf data and data_len to their default values.
+ * All other fields of the given packet mbuf will be left intact.
+ *
+ * @param m
+ * The attached packet mbuf.
+ */
+static inline void pktmbuf_detach_zcp(struct rte_mbuf *m)
+{
+ const struct rte_mempool *mp = m->pool;
+ void *buf = rte_mbuf_to_baddr(m);
+ uint32_t buf_ofs;
+ uint32_t buf_len = mp->elt_size - sizeof(*m);
+ m->buf_physaddr = rte_mempool_virt2phy(mp, m) + sizeof(*m);
+
+ m->buf_addr = buf;
+ m->buf_len = (uint16_t)buf_len;
+
+ buf_ofs = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ?
+ RTE_PKTMBUF_HEADROOM : m->buf_len;
+ m->data_off = buf_ofs;
+
+ m->data_len = 0;
+}
+
+/*
+ * This function is called after packets have been transimited. It fetchs mbuf
+ * from vpool->pool, detached it and put into vpool->ring. It also update the
+ * used index and kick the guest if necessary.
+ */
+static inline uint32_t __attribute__((always_inline))
+txmbuf_clean_zcp(struct virtio_net *dev, struct vpool *vpool)
+{
+ struct rte_mbuf *mbuf;
+ struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];
+ uint32_t used_idx = vq->last_used_idx & (vq->size - 1);
+ uint32_t index = 0;
+ uint32_t mbuf_count = rte_mempool_count(vpool->pool);
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool before "
+ "clean is: %d\n",
+ dev->device_fh, mbuf_count);
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring before "
+ "clean is : %d\n",
+ dev->device_fh, rte_ring_count(vpool->ring));
+
+ for (index = 0; index < mbuf_count; index++) {
+ mbuf = __rte_mbuf_raw_alloc(vpool->pool);
+ if (likely(MBUF_EXT_MEM(mbuf)))
+ pktmbuf_detach_zcp(mbuf);
+ rte_ring_sp_enqueue(vpool->ring, mbuf);
+
+ /* Update used index buffer information. */
+ vq->used->ring[used_idx].id = MBUF_HEADROOM_UINT32(mbuf);
+ vq->used->ring[used_idx].len = 0;
+
+ used_idx = (used_idx + 1) & (vq->size - 1);
+ }
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in mempool after "
+ "clean is: %d\n",
+ dev->device_fh, rte_mempool_count(vpool->pool));
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: mbuf count in ring after "
+ "clean is : %d\n",
+ dev->device_fh, rte_ring_count(vpool->ring));
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: before updated "
+ "vq->last_used_idx:%d\n",
+ dev->device_fh, vq->last_used_idx);
+
+ vq->last_used_idx += mbuf_count;
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in txmbuf_clean_zcp: after updated "
+ "vq->last_used_idx:%d\n",
+ dev->device_fh, vq->last_used_idx);
+
+ rte_compiler_barrier();
+
+ *(volatile uint16_t *)&vq->used->idx += mbuf_count;
+
+ /* Kick guest if required. */
+ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ eventfd_write(vq->callfd, (eventfd_t)1);
+
+ return 0;
+}
+
+/*
+ * This function is called when a virtio device is destroy.
+ * It fetchs mbuf from vpool->pool, and detached it, and put into vpool->ring.
+ */
+static void mbuf_destroy_zcp(struct vpool *vpool)
+{
+ struct rte_mbuf *mbuf = NULL;
+ uint32_t index, mbuf_count = rte_mempool_count(vpool->pool);
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "in mbuf_destroy_zcp: mbuf count in mempool before "
+ "mbuf_destroy_zcp is: %d\n",
+ mbuf_count);
+ LOG_DEBUG(VHOST_CONFIG,
+ "in mbuf_destroy_zcp: mbuf count in ring before "
+ "mbuf_destroy_zcp is : %d\n",
+ rte_ring_count(vpool->ring));
+
+ for (index = 0; index < mbuf_count; index++) {
+ mbuf = __rte_mbuf_raw_alloc(vpool->pool);
+ if (likely(mbuf != NULL)) {
+ if (likely(MBUF_EXT_MEM(mbuf)))
+ pktmbuf_detach_zcp(mbuf);
+ rte_ring_sp_enqueue(vpool->ring, (void *)mbuf);
+ }
+ }
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "in mbuf_destroy_zcp: mbuf count in mempool after "
+ "mbuf_destroy_zcp is: %d\n",
+ rte_mempool_count(vpool->pool));
+ LOG_DEBUG(VHOST_CONFIG,
+ "in mbuf_destroy_zcp: mbuf count in ring after "
+ "mbuf_destroy_zcp is : %d\n",
+ rte_ring_count(vpool->ring));
+}
+
+/*
+ * This function update the use flag and counter.
+ */
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_rx_zcp(struct virtio_net *dev, struct rte_mbuf **pkts,
+ uint32_t count)
+{
+ struct vhost_virtqueue *vq;
+ struct vring_desc *desc;
+ struct rte_mbuf *buff;
+ /* The virtio_hdr is initialised to 0. */
+ struct virtio_net_hdr_mrg_rxbuf virtio_hdr
+ = {{0, 0, 0, 0, 0, 0}, 0};
+ uint64_t buff_hdr_addr = 0;
+ uint32_t head[MAX_PKT_BURST], packet_len = 0;
+ uint32_t head_idx, packet_success = 0;
+ uint16_t res_cur_idx;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+
+ if (count == 0)
+ return 0;
+
+ vq = dev->virtqueue[VIRTIO_RXQ];
+ count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
+
+ res_cur_idx = vq->last_used_idx;
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n",
+ dev->device_fh, res_cur_idx, res_cur_idx + count);
+
+ /* Retrieve all of the head indexes first to avoid caching issues. */
+ for (head_idx = 0; head_idx < count; head_idx++)
+ head[head_idx] = MBUF_HEADROOM_UINT32(pkts[head_idx]);
+
+ /*Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+
+ while (packet_success != count) {
+ /* Get descriptor from available ring */
+ desc = &vq->desc[head[packet_success]];
+
+ buff = pkts[packet_success];
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in dev_rx_zcp: update the used idx for "
+ "pkt[%d] descriptor idx: %d\n",
+ dev->device_fh, packet_success,
+ MBUF_HEADROOM_UINT32(buff));
+
+ PRINT_PACKET(dev,
+ (uintptr_t)(((uint64_t)(uintptr_t)buff->buf_addr)
+ + RTE_PKTMBUF_HEADROOM),
+ rte_pktmbuf_data_len(buff), 0);
+
+ /* Buffer address translation for virtio header. */
+ buff_hdr_addr = gpa_to_vva(dev, desc->addr);
+ packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
+
+ /*
+ * If the descriptors are chained the header and data are
+ * placed in separate buffers.
+ */
+ if (desc->flags & VRING_DESC_F_NEXT) {
+ desc->len = vq->vhost_hlen;
+ desc = &vq->desc[desc->next];
+ desc->len = rte_pktmbuf_data_len(buff);
+ } else {
+ desc->len = packet_len;
+ }
+
+ /* Update used ring with desc information */
+ vq->used->ring[res_cur_idx & (vq->size - 1)].id
+ = head[packet_success];
+ vq->used->ring[res_cur_idx & (vq->size - 1)].len
+ = packet_len;
+ res_cur_idx++;
+ packet_success++;
+
+ /* A header is required per buffer. */
+ rte_memcpy((void *)(uintptr_t)buff_hdr_addr,
+ (const void *)&virtio_hdr, vq->vhost_hlen);
+
+ PRINT_PACKET(dev, (uintptr_t)buff_hdr_addr, vq->vhost_hlen, 1);
+
+ if (likely(packet_success < count)) {
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+ }
+ }
+
+ rte_compiler_barrier();
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in dev_rx_zcp: before update used idx: "
+ "vq.last_used_idx: %d, vq->used->idx: %d\n",
+ dev->device_fh, vq->last_used_idx, vq->used->idx);
+
+ *(volatile uint16_t *)&vq->used->idx += count;
+ vq->last_used_idx += count;
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in dev_rx_zcp: after update used idx: "
+ "vq.last_used_idx: %d, vq->used->idx: %d\n",
+ dev->device_fh, vq->last_used_idx, vq->used->idx);
+
+ /* Kick the guest if necessary. */
+ if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT))
+ eventfd_write(vq->callfd, (eventfd_t)1);
+
+ return count;
+}
+
+/*
+ * This function routes the TX packet to the correct interface.
+ * This may be a local device or the physical port.
+ */
+static inline void __attribute__((always_inline))
+virtio_tx_route_zcp(struct virtio_net *dev, struct rte_mbuf *m,
+ uint32_t desc_idx, uint8_t need_copy)
+{
+ struct mbuf_table *tx_q;
+ struct rte_mbuf **m_table;
+ void *obj = NULL;
+ struct rte_mbuf *mbuf;
+ unsigned len, ret, offset = 0;
+ struct vpool *vpool;
+ uint16_t vlan_tag = (uint16_t)vlan_tags[(uint16_t)dev->device_fh];
+ uint16_t vmdq_rx_q = ((struct vhost_dev *)dev->priv)->vmdq_rx_q;
+
+ /*Add packet to the port tx queue*/
+ tx_q = &tx_queue_zcp[vmdq_rx_q];
+ len = tx_q->len;
+
+ /* Allocate an mbuf and populate the structure. */
+ vpool = &vpool_array[MAX_QUEUES + vmdq_rx_q];
+ rte_ring_sc_dequeue(vpool->ring, &obj);
+ mbuf = obj;
+ if (unlikely(mbuf == NULL)) {
+ struct vhost_virtqueue *vq = dev->virtqueue[VIRTIO_TXQ];
+ RTE_LOG(ERR, VHOST_DATA,
+ "(%"PRIu64") Failed to allocate memory for mbuf.\n",
+ dev->device_fh);
+ put_desc_to_used_list_zcp(vq, desc_idx);
+ return;
+ }
+
+ if (vm2vm_mode == VM2VM_HARDWARE) {
+ /* Avoid using a vlan tag from any vm for external pkt, such as
+ * vlan_tags[dev->device_fh], oterwise, it conflicts when pool
+ * selection, MAC address determines it as an external pkt
+ * which should go to network, while vlan tag determine it as
+ * a vm2vm pkt should forward to another vm. Hardware confuse
+ * such a ambiguous situation, so pkt will lost.
+ */
+ vlan_tag = external_pkt_default_vlan_tag;
+ if (find_local_dest(dev, m, &offset, &vlan_tag) != 0) {
+ MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;
+ __rte_mbuf_raw_free(mbuf);
+ return;
+ }
+ }
+
+ mbuf->nb_segs = m->nb_segs;
+ mbuf->next = m->next;
+ mbuf->data_len = m->data_len + offset;
+ mbuf->pkt_len = mbuf->data_len;
+ if (unlikely(need_copy)) {
+ /* Copy the packet contents to the mbuf. */
+ rte_memcpy(rte_pktmbuf_mtod(mbuf, void *),
+ rte_pktmbuf_mtod(m, void *),
+ m->data_len);
+ } else {
+ mbuf->data_off = m->data_off;
+ mbuf->buf_physaddr = m->buf_physaddr;
+ mbuf->buf_addr = m->buf_addr;
+ }
+ mbuf->ol_flags |= PKT_TX_VLAN_PKT;
+ mbuf->vlan_tci = vlan_tag;
+ mbuf->l2_len = sizeof(struct ether_hdr);
+ mbuf->l3_len = sizeof(struct ipv4_hdr);
+ MBUF_HEADROOM_UINT32(mbuf) = (uint32_t)desc_idx;
+
+ tx_q->m_table[len] = mbuf;
+ len++;
+
+ LOG_DEBUG(VHOST_DATA,
+ "(%"PRIu64") in tx_route_zcp: pkt: nb_seg: %d, next:%s\n",
+ dev->device_fh,
+ mbuf->nb_segs,
+ (mbuf->next == NULL) ? "null" : "non-null");
+
+ if (enable_stats) {
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx++;
+ }
+
+ if (unlikely(len == MAX_PKT_BURST)) {
+ m_table = (struct rte_mbuf **)tx_q->m_table;
+ ret = rte_eth_tx_burst(ports[0],
+ (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
+
+ /*
+ * Free any buffers not handled by TX and update
+ * the port stats.
+ */
+ if (unlikely(ret < len)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < len);
+ }
+
+ len = 0;
+ txmbuf_clean_zcp(dev, vpool);
+ }
+
+ tx_q->len = len;
+
+ return;
+}
+
+/*
+ * This function TX all available packets in virtio TX queue for one
+ * virtio-net device. If it is first packet, it learns MAC address and
+ * setup VMDQ.
+ */
+static inline void __attribute__((always_inline))
+virtio_dev_tx_zcp(struct virtio_net *dev)
+{
+ struct rte_mbuf m;
+ struct vhost_virtqueue *vq;
+ struct vring_desc *desc;
+ uint64_t buff_addr = 0, phys_addr;
+ uint32_t head[MAX_PKT_BURST];
+ uint32_t i;
+ uint16_t free_entries, packet_success = 0;
+ uint16_t avail_idx;
+ uint8_t need_copy = 0;
+ hpa_type addr_type;
+ struct vhost_dev *vdev = (struct vhost_dev *)dev->priv;
+
+ vq = dev->virtqueue[VIRTIO_TXQ];
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+
+ /* If there are no available buffers then return. */
+ if (vq->last_used_idx_res == avail_idx)
+ return;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh);
+
+ /* Prefetch available ring to retrieve head indexes. */
+ rte_prefetch0(&vq->avail->ring[vq->last_used_idx_res & (vq->size - 1)]);
+
+ /* Get the number of free entries in the ring */
+ free_entries = (avail_idx - vq->last_used_idx_res);
+
+ /* Limit to MAX_PKT_BURST. */
+ free_entries
+ = (free_entries > MAX_PKT_BURST) ? MAX_PKT_BURST : free_entries;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n",
+ dev->device_fh, free_entries);
+
+ /* Retrieve all of the head indexes first to avoid caching issues. */
+ for (i = 0; i < free_entries; i++)
+ head[i]
+ = vq->avail->ring[(vq->last_used_idx_res + i)
+ & (vq->size - 1)];
+
+ vq->last_used_idx_res += free_entries;
+
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+ rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]);
+
+ while (packet_success < free_entries) {
+ desc = &vq->desc[head[packet_success]];
+
+ /* Discard first buffer as it is the virtio header */
+ desc = &vq->desc[desc->next];
+
+ /* Buffer address translation. */
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ /* Need check extra VLAN_HLEN size for inserting VLAN tag */
+ phys_addr = gpa_to_hpa(vdev, desc->addr, desc->len + VLAN_HLEN,
+ &addr_type);
+
+ if (likely(packet_success < (free_entries - 1)))
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success + 1]]);
+
+ if (unlikely(addr_type == PHYS_ADDR_INVALID)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "(%"PRIu64") Invalid frame buffer address found"
+ "when TX packets!\n",
+ dev->device_fh);
+ packet_success++;
+ continue;
+ }
+
+ /* Prefetch buffer address. */
+ rte_prefetch0((void *)(uintptr_t)buff_addr);
+
+ /*
+ * Setup dummy mbuf. This is copied to a real mbuf if
+ * transmitted out the physical port.
+ */
+ m.data_len = desc->len;
+ m.nb_segs = 1;
+ m.next = NULL;
+ m.data_off = 0;
+ m.buf_addr = (void *)(uintptr_t)buff_addr;
+ m.buf_physaddr = phys_addr;
+
+ /*
+ * Check if the frame buffer address from guest crosses
+ * sub-region or not.
+ */
+ if (unlikely(addr_type == PHYS_ADDR_CROSS_SUBREG)) {
+ RTE_LOG(ERR, VHOST_DATA,
+ "(%"PRIu64") Frame buffer address cross "
+ "sub-regioin found when attaching TX frame "
+ "buffer address!\n",
+ dev->device_fh);
+ need_copy = 1;
+ } else
+ need_copy = 0;
+
+ PRINT_PACKET(dev, (uintptr_t)buff_addr, desc->len, 0);
+
+ /*
+ * If this is the first received packet we need to learn
+ * the MAC and setup VMDQ
+ */
+ if (unlikely(vdev->ready == DEVICE_MAC_LEARNING)) {
+ if (vdev->remove || (link_vmdq(vdev, &m) == -1)) {
+ /*
+ * Discard frame if device is scheduled for
+ * removal or a duplicate MAC address is found.
+ */
+ packet_success += free_entries;
+ vq->last_used_idx += packet_success;
+ break;
+ }
+ }
+
+ virtio_tx_route_zcp(dev, &m, head[packet_success], need_copy);
+ packet_success++;
+ }
+}
+
+/*
+ * This function is called by each data core. It handles all RX/TX registered
+ * with the core. For TX the specific lcore linked list is used. For RX, MAC
+ * addresses are compared with all devices in the main linked list.
+ */
+static int
+switch_worker_zcp(__attribute__((unused)) void *arg)
+{
+ struct virtio_net *dev = NULL;
+ struct vhost_dev *vdev = NULL;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct virtio_net_data_ll *dev_ll;
+ struct mbuf_table *tx_q;
+ volatile struct lcore_ll_info *lcore_ll;
+ const uint64_t drain_tsc
+ = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S
+ * BURST_TX_DRAIN_US;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
+ unsigned ret;
+ const uint16_t lcore_id = rte_lcore_id();
+ uint16_t count_in_ring, rx_count = 0;
+
+ RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started\n", lcore_id);
+
+ lcore_ll = lcore_info[lcore_id].lcore_ll;
+ prev_tsc = 0;
+
+ while (1) {
+ cur_tsc = rte_rdtsc();
+
+ /* TX burst queue drain */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+ /*
+ * Get mbuf from vpool.pool and detach mbuf and
+ * put back into vpool.ring.
+ */
+ dev_ll = lcore_ll->ll_root_used;
+ while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {
+ /* Get virtio device ID */
+ vdev = dev_ll->vdev;
+ dev = vdev->dev;
+
+ if (likely(!vdev->remove)) {
+ tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q];
+ if (tx_q->len) {
+ LOG_DEBUG(VHOST_DATA,
+ "TX queue drained after timeout"
+ " with burst size %u\n",
+ tx_q->len);
+
+ /*
+ * Tx any packets in the queue
+ */
+ ret = rte_eth_tx_burst(
+ ports[0],
+ (uint16_t)tx_q->txq_id,
+ (struct rte_mbuf **)
+ tx_q->m_table,
+ (uint16_t)tx_q->len);
+ if (unlikely(ret < tx_q->len)) {
+ do {
+ rte_pktmbuf_free(
+ tx_q->m_table[ret]);
+ } while (++ret < tx_q->len);
+ }
+ tx_q->len = 0;
+
+ txmbuf_clean_zcp(dev,
+ &vpool_array[MAX_QUEUES+vdev->vmdq_rx_q]);
+ }
+ }
+ dev_ll = dev_ll->next;
+ }
+ prev_tsc = cur_tsc;
+ }
+
+ rte_prefetch0(lcore_ll->ll_root_used);
+
+ /*
+ * Inform the configuration core that we have exited the linked
+ * list and that no devices are in use if requested.
+ */
+ if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
+ lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+
+ /* Process devices */
+ dev_ll = lcore_ll->ll_root_used;
+
+ while ((dev_ll != NULL) && (dev_ll->vdev != NULL)) {
+ vdev = dev_ll->vdev;
+ dev = vdev->dev;
+ if (unlikely(vdev->remove)) {
+ dev_ll = dev_ll->next;
+ unlink_vmdq(vdev);
+ vdev->ready = DEVICE_SAFE_REMOVE;
+ continue;
+ }
+
+ if (likely(vdev->ready == DEVICE_RX)) {
+ uint32_t index = vdev->vmdq_rx_q;
+ uint16_t i;
+ count_in_ring
+ = rte_ring_count(vpool_array[index].ring);
+ uint16_t free_entries
+ = (uint16_t)get_available_ring_num_zcp(dev);
+
+ /*
+ * Attach all mbufs in vpool.ring and put back
+ * into vpool.pool.
+ */
+ for (i = 0;
+ i < RTE_MIN(free_entries,
+ RTE_MIN(count_in_ring, MAX_PKT_BURST));
+ i++)
+ attach_rxmbuf_zcp(dev);
+
+ /* Handle guest RX */
+ rx_count = rte_eth_rx_burst(ports[0],
+ vdev->vmdq_rx_q, pkts_burst,
+ MAX_PKT_BURST);
+
+ if (rx_count) {
+ ret_count = virtio_dev_rx_zcp(dev,
+ pkts_burst, rx_count);
+ if (enable_stats) {
+ dev_statistics[dev->device_fh].rx_total
+ += rx_count;
+ dev_statistics[dev->device_fh].rx
+ += ret_count;
+ }
+ while (likely(rx_count)) {
+ rx_count--;
+ pktmbuf_detach_zcp(
+ pkts_burst[rx_count]);
+ rte_ring_sp_enqueue(
+ vpool_array[index].ring,
+ (void *)pkts_burst[rx_count]);
+ }
+ }
+ }
+
+ if (likely(!vdev->remove))
+ /* Handle guest TX */
+ virtio_dev_tx_zcp(dev);
+
+ /* Move to the next device in the list */
+ dev_ll = dev_ll->next;
+ }
+ }
+
+ return 0;
+}
+
+
+/*
+ * Add an entry to a used linked list. A free entry must first be found
+ * in the free linked list using get_data_ll_free_entry();
+ */
+static void
+add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ /* Set next as NULL and use a compiler barrier to avoid reordering. */
+ ll_dev->next = NULL;
+ rte_compiler_barrier();
+
+ /* If ll == NULL then this is the first device. */
+ if (ll) {
+ /* Increment to the tail of the linked list. */
+ while ((ll->next != NULL) )
+ ll = ll->next;
+
+ ll->next = ll_dev;
+ } else {
+ *ll_root_addr = ll_dev;
+ }
+}
+
+/*
+ * Remove an entry from a used linked list. The entry must then be added to
+ * the free linked list using put_data_ll_free_entry().
+ */
+static void
+rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev,
+ struct virtio_net_data_ll *ll_dev_last)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ if (unlikely((ll == NULL) || (ll_dev == NULL)))
+ return;
+
+ if (ll_dev == ll)
+ *ll_root_addr = ll_dev->next;
+ else
+ if (likely(ll_dev_last != NULL))
+ ll_dev_last->next = ll_dev->next;
+ else
+ RTE_LOG(ERR, VHOST_CONFIG, "Remove entry form ll failed.\n");
+}
+
+/*
+ * Find and return an entry from the free linked list.
+ */
+static struct virtio_net_data_ll *
+get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+ struct virtio_net_data_ll *ll_dev;
+
+ if (ll_free == NULL)
+ return NULL;
+
+ ll_dev = ll_free;
+ *ll_root_addr = ll_free->next;
+
+ return ll_dev;
+}
+
+/*
+ * Place an entry back on to the free linked list.
+ */
+static void
+put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr,
+ struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+
+ if (ll_dev == NULL)
+ return;
+
+ ll_dev->next = ll_free;
+ *ll_root_addr = ll_dev;
+}
+
+/*
+ * Creates a linked list of a given size.
+ */
+static struct virtio_net_data_ll *
+alloc_data_ll(uint32_t size)
+{
+ struct virtio_net_data_ll *ll_new;
+ uint32_t i;
+
+ /* Malloc and then chain the linked list. */
+ ll_new = malloc(size * sizeof(struct virtio_net_data_ll));
+ if (ll_new == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n");
+ return NULL;
+ }
+
+ for (i = 0; i < size - 1; i++) {
+ ll_new[i].vdev = NULL;
+ ll_new[i].next = &ll_new[i+1];
+ }
+ ll_new[i].next = NULL;
+
+ return ll_new;
+}
+
+/*
+ * Create the main linked list along with each individual cores linked list. A used and a free list
+ * are created to manage entries.
+ */
+static int
+init_data_ll (void)
+{
+ int lcore;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info));
+ if (lcore_info[lcore].lcore_ll == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n");
+ return -1;
+ }
+
+ lcore_info[lcore].lcore_ll->device_num = 0;
+ lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+ lcore_info[lcore].lcore_ll->ll_root_used = NULL;
+ if (num_devices % num_switching_cores)
+ lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1);
+ else
+ lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores);
+ }
+
+ /* Allocate devices up to a maximum of MAX_DEVICES. */
+ ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));
+
+ return 0;
+}
+
+/*
+ * Remove a device from the specific data core linked list and from the main linked list. Synchonization
+ * occurs through the use of the lcore dev_removal_flag. Device is made volatile here to avoid re-ordering
+ * of dev->remove=1 which can cause an infinite loop in the rte_pause loop.
+ */
+static void
+destroy_device (volatile struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_lcore_dev_cur;
+ struct virtio_net_data_ll *ll_main_dev_cur;
+ struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
+ struct virtio_net_data_ll *ll_main_dev_last = NULL;
+ struct vhost_dev *vdev;
+ int lcore;
+
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+
+ vdev = (struct vhost_dev *)dev->priv;
+ /*set the remove flag. */
+ vdev->remove = 1;
+ while(vdev->ready != DEVICE_SAFE_REMOVE) {
+ rte_pause();
+ }
+
+ /* Search for entry to be removed from lcore ll */
+ ll_lcore_dev_cur = lcore_info[vdev->coreid].lcore_ll->ll_root_used;
+ while (ll_lcore_dev_cur != NULL) {
+ if (ll_lcore_dev_cur->vdev == vdev) {
+ break;
+ } else {
+ ll_lcore_dev_last = ll_lcore_dev_cur;
+ ll_lcore_dev_cur = ll_lcore_dev_cur->next;
+ }
+ }
+
+ if (ll_lcore_dev_cur == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "(%"PRIu64") Failed to find the dev to be destroy.\n",
+ dev->device_fh);
+ return;
+ }
+
+ /* Search for entry to be removed from main ll */
+ ll_main_dev_cur = ll_root_used;
+ ll_main_dev_last = NULL;
+ while (ll_main_dev_cur != NULL) {
+ if (ll_main_dev_cur->vdev == vdev) {
+ break;
+ } else {
+ ll_main_dev_last = ll_main_dev_cur;
+ ll_main_dev_cur = ll_main_dev_cur->next;
+ }
+ }
+
+ /* Remove entries from the lcore and main ll. */
+ rm_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
+ rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
+
+ /* Set the dev_removal_flag on each lcore. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
+ }
+
+ /*
+ * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
+ * they can no longer access the device removed from the linked lists and that the devices
+ * are no longer in use.
+ */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {
+ rte_pause();
+ }
+ }
+
+ /* Add the entries back to the lcore and main free ll.*/
+ put_data_ll_free_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
+ put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
+
+ /* Decrement number of device on the lcore. */
+ lcore_info[vdev->coreid].lcore_ll->device_num--;
+
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
+
+ if (zero_copy) {
+ struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
+
+ /* Stop the RX queue. */
+ if (rte_eth_dev_rx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") In destroy_device: Failed to stop "
+ "rx queue:%d\n",
+ dev->device_fh,
+ vdev->vmdq_rx_q);
+ }
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") in destroy_device: Start put mbuf in "
+ "mempool back to ring for RX queue: %d\n",
+ dev->device_fh, vdev->vmdq_rx_q);
+
+ mbuf_destroy_zcp(vpool);
+
+ /* Stop the TX queue. */
+ if (rte_eth_dev_tx_queue_stop(ports[0], vdev->vmdq_rx_q) != 0) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") In destroy_device: Failed to "
+ "stop tx queue:%d\n",
+ dev->device_fh, vdev->vmdq_rx_q);
+ }
+
+ vpool = &vpool_array[vdev->vmdq_rx_q + MAX_QUEUES];
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") destroy_device: Start put mbuf in mempool "
+ "back to ring for TX queue: %d, dev:(%"PRIu64")\n",
+ dev->device_fh, (vdev->vmdq_rx_q + MAX_QUEUES),
+ dev->device_fh);
+
+ mbuf_destroy_zcp(vpool);
+ rte_free(vdev->regions_hpa);
+ }
+ rte_free(vdev);
+
+}
+
+/*
+ * Calculate the region count of physical continous regions for one particular
+ * region of whose vhost virtual address is continous. The particular region
+ * start from vva_start, with size of 'size' in argument.
+ */
+static uint32_t
+check_hpa_regions(uint64_t vva_start, uint64_t size)
+{
+ uint32_t i, nregions = 0, page_size = getpagesize();
+ uint64_t cur_phys_addr = 0, next_phys_addr = 0;
+ if (vva_start % page_size) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "in check_countinous: vva start(%p) mod page_size(%d) "
+ "has remainder\n",
+ (void *)(uintptr_t)vva_start, page_size);
+ return 0;
+ }
+ if (size % page_size) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "in check_countinous: "
+ "size((%"PRIu64")) mod page_size(%d) has remainder\n",
+ size, page_size);
+ return 0;
+ }
+ for (i = 0; i < size - page_size; i = i + page_size) {
+ cur_phys_addr
+ = rte_mem_virt2phy((void *)(uintptr_t)(vva_start + i));
+ next_phys_addr = rte_mem_virt2phy(
+ (void *)(uintptr_t)(vva_start + i + page_size));
+ if ((cur_phys_addr + page_size) != next_phys_addr) {
+ ++nregions;
+ LOG_DEBUG(VHOST_CONFIG,
+ "in check_continuous: hva addr:(%p) is not "
+ "continuous with hva addr:(%p), diff:%d\n",
+ (void *)(uintptr_t)(vva_start + (uint64_t)i),
+ (void *)(uintptr_t)(vva_start + (uint64_t)i
+ + page_size), page_size);
+ LOG_DEBUG(VHOST_CONFIG,
+ "in check_continuous: hpa addr:(%p) is not "
+ "continuous with hpa addr:(%p), "
+ "diff:(%"PRIu64")\n",
+ (void *)(uintptr_t)cur_phys_addr,
+ (void *)(uintptr_t)next_phys_addr,
+ (next_phys_addr-cur_phys_addr));
+ }
+ }
+ return nregions;
+}
+
+/*
+ * Divide each region whose vhost virtual address is continous into a few
+ * sub-regions, make sure the physical address within each sub-region are
+ * continous. And fill offset(to GPA) and size etc. information of each
+ * sub-region into regions_hpa.
+ */
+static uint32_t
+fill_hpa_memory_regions(struct virtio_memory_regions_hpa *mem_region_hpa, struct virtio_memory *virtio_memory)
+{
+ uint32_t regionidx, regionidx_hpa = 0, i, k, page_size = getpagesize();
+ uint64_t cur_phys_addr = 0, next_phys_addr = 0, vva_start;
+
+ if (mem_region_hpa == NULL)
+ return 0;
+
+ for (regionidx = 0; regionidx < virtio_memory->nregions; regionidx++) {
+ vva_start = virtio_memory->regions[regionidx].guest_phys_address +
+ virtio_memory->regions[regionidx].address_offset;
+ mem_region_hpa[regionidx_hpa].guest_phys_address
+ = virtio_memory->regions[regionidx].guest_phys_address;
+ mem_region_hpa[regionidx_hpa].host_phys_addr_offset =
+ rte_mem_virt2phy((void *)(uintptr_t)(vva_start)) -
+ mem_region_hpa[regionidx_hpa].guest_phys_address;
+ LOG_DEBUG(VHOST_CONFIG,
+ "in fill_hpa_regions: guest phys addr start[%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].guest_phys_address));
+ LOG_DEBUG(VHOST_CONFIG,
+ "in fill_hpa_regions: host phys addr start[%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].host_phys_addr_offset));
+ for (i = 0, k = 0;
+ i < virtio_memory->regions[regionidx].memory_size -
+ page_size;
+ i += page_size) {
+ cur_phys_addr = rte_mem_virt2phy(
+ (void *)(uintptr_t)(vva_start + i));
+ next_phys_addr = rte_mem_virt2phy(
+ (void *)(uintptr_t)(vva_start +
+ i + page_size));
+ if ((cur_phys_addr + page_size) != next_phys_addr) {
+ mem_region_hpa[regionidx_hpa].guest_phys_address_end =
+ mem_region_hpa[regionidx_hpa].guest_phys_address +
+ k + page_size;
+ mem_region_hpa[regionidx_hpa].memory_size
+ = k + page_size;
+ LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest "
+ "phys addr end [%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].guest_phys_address_end));
+ LOG_DEBUG(VHOST_CONFIG,
+ "in fill_hpa_regions: guest phys addr "
+ "size [%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].memory_size));
+ mem_region_hpa[regionidx_hpa + 1].guest_phys_address
+ = mem_region_hpa[regionidx_hpa].guest_phys_address_end;
+ ++regionidx_hpa;
+ mem_region_hpa[regionidx_hpa].host_phys_addr_offset =
+ next_phys_addr -
+ mem_region_hpa[regionidx_hpa].guest_phys_address;
+ LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest"
+ " phys addr start[%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].guest_phys_address));
+ LOG_DEBUG(VHOST_CONFIG,
+ "in fill_hpa_regions: host phys addr "
+ "start[%d]:(%p)\n",
+ regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].host_phys_addr_offset));
+ k = 0;
+ } else {
+ k += page_size;
+ }
+ }
+ mem_region_hpa[regionidx_hpa].guest_phys_address_end
+ = mem_region_hpa[regionidx_hpa].guest_phys_address
+ + k + page_size;
+ mem_region_hpa[regionidx_hpa].memory_size = k + page_size;
+ LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr end "
+ "[%d]:(%p)\n", regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].guest_phys_address_end));
+ LOG_DEBUG(VHOST_CONFIG, "in fill_hpa_regions: guest phys addr size "
+ "[%d]:(%p)\n", regionidx_hpa,
+ (void *)(uintptr_t)
+ (mem_region_hpa[regionidx_hpa].memory_size));
+ ++regionidx_hpa;
+ }
+ return regionidx_hpa;
+}
+
+/*
+ * A new device is added to a data core. First the device is added to the main linked list
+ * and the allocated to a specific data core.
+ */
+static int
+new_device (struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_dev;
+ int lcore, core_add = 0;
+ uint32_t device_num_min = num_devices;
+ struct vhost_dev *vdev;
+ uint32_t regionidx;
+
+ vdev = rte_zmalloc("vhost device", sizeof(*vdev), RTE_CACHE_LINE_SIZE);
+ if (vdev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Couldn't allocate memory for vhost dev\n",
+ dev->device_fh);
+ return -1;
+ }
+ vdev->dev = dev;
+ dev->priv = vdev;
+
+ if (zero_copy) {
+ vdev->nregions_hpa = dev->mem->nregions;
+ for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+ vdev->nregions_hpa
+ += check_hpa_regions(
+ dev->mem->regions[regionidx].guest_phys_address
+ + dev->mem->regions[regionidx].address_offset,
+ dev->mem->regions[regionidx].memory_size);
+
+ }
+
+ vdev->regions_hpa = rte_calloc("vhost hpa region",
+ vdev->nregions_hpa,
+ sizeof(struct virtio_memory_regions_hpa),
+ RTE_CACHE_LINE_SIZE);
+ if (vdev->regions_hpa == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Cannot allocate memory for hpa region\n");
+ rte_free(vdev);
+ return -1;
+ }
+
+
+ if (fill_hpa_memory_regions(
+ vdev->regions_hpa, dev->mem
+ ) != vdev->nregions_hpa) {
+
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "hpa memory regions number mismatch: "
+ "[%d]\n", vdev->nregions_hpa);
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+ }
+
+
+ /* Add device to main ll */
+ ll_dev = get_data_ll_free_entry(&ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit "
+ "of %d devices per core has been reached\n",
+ dev->device_fh, num_devices);
+ if (vdev->regions_hpa)
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+ ll_dev->vdev = vdev;
+ add_data_ll_entry(&ll_root_used, ll_dev);
+ vdev->vmdq_rx_q
+ = dev->device_fh * queues_per_pool + vmdq_queue_base;
+
+ if (zero_copy) {
+ uint32_t index = vdev->vmdq_rx_q;
+ uint32_t count_in_ring, i;
+ struct mbuf_table *tx_q;
+
+ count_in_ring = rte_ring_count(vpool_array[index].ring);
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") in new_device: mbuf count in mempool "
+ "before attach is: %d\n",
+ dev->device_fh,
+ rte_mempool_count(vpool_array[index].pool));
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") in new_device: mbuf count in ring "
+ "before attach is : %d\n",
+ dev->device_fh, count_in_ring);
+
+ /*
+ * Attach all mbufs in vpool.ring and put back intovpool.pool.
+ */
+ for (i = 0; i < count_in_ring; i++)
+ attach_rxmbuf_zcp(dev);
+
+ LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in "
+ "mempool after attach is: %d\n",
+ dev->device_fh,
+ rte_mempool_count(vpool_array[index].pool));
+ LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") in new_device: mbuf count in "
+ "ring after attach is : %d\n",
+ dev->device_fh,
+ rte_ring_count(vpool_array[index].ring));
+
+ tx_q = &tx_queue_zcp[(uint16_t)vdev->vmdq_rx_q];
+ tx_q->txq_id = vdev->vmdq_rx_q;
+
+ if (rte_eth_dev_tx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) {
+ struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") In new_device: Failed to start "
+ "tx queue:%d\n",
+ dev->device_fh, vdev->vmdq_rx_q);
+
+ mbuf_destroy_zcp(vpool);
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+
+ if (rte_eth_dev_rx_queue_start(ports[0], vdev->vmdq_rx_q) != 0) {
+ struct vpool *vpool = &vpool_array[vdev->vmdq_rx_q];
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") In new_device: Failed to start "
+ "rx queue:%d\n",
+ dev->device_fh, vdev->vmdq_rx_q);
+
+ /* Stop the TX queue. */
+ if (rte_eth_dev_tx_queue_stop(ports[0],
+ vdev->vmdq_rx_q) != 0) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "(%"PRIu64") In new_device: Failed to "
+ "stop tx queue:%d\n",
+ dev->device_fh, vdev->vmdq_rx_q);
+ }
+
+ mbuf_destroy_zcp(vpool);
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+
+ }
+
+ /*reset ready flag*/
+ vdev->ready = DEVICE_MAC_LEARNING;
+ vdev->remove = 0;
+
+ /* Find a suitable lcore to add the device. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
+ device_num_min = lcore_info[lcore].lcore_ll->device_num;
+ core_add = lcore;
+ }
+ }
+ /* Add device to lcore ll */
+ ll_dev = get_data_ll_free_entry(&lcore_info[core_add].lcore_ll->ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
+ vdev->ready = DEVICE_SAFE_REMOVE;
+ destroy_device(dev);
+ rte_free(vdev->regions_hpa);
+ rte_free(vdev);
+ return -1;
+ }
+ ll_dev->vdev = vdev;
+ vdev->coreid = core_add;
+
+ add_data_ll_entry(&lcore_info[vdev->coreid].lcore_ll->ll_root_used, ll_dev);
+
+ /* Initialize device stats */
+ memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));
+
+ /* Disable notifications. */
+ rte_vhost_enable_guest_notification(dev, VIRTIO_RXQ, 0);
+ rte_vhost_enable_guest_notification(dev, VIRTIO_TXQ, 0);
+ lcore_info[vdev->coreid].lcore_ll->device_num++;
+ dev->flags |= VIRTIO_DEV_RUNNING;
+
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, vdev->coreid);
+
+ return 0;
+}
+
+/*
+ * These callback allow devices to be added to the data core when configuration
+ * has been fully complete.
+ */
+static const struct virtio_net_device_ops virtio_net_device_ops =
+{
+ .new_device = new_device,
+ .destroy_device = destroy_device,
+};
+
+/*
+ * This is a thread will wake up after a period to print stats if the user has
+ * enabled them.
+ */
+static void
+print_stats(void)
+{
+ struct virtio_net_data_ll *dev_ll;
+ uint64_t tx_dropped, rx_dropped;
+ uint64_t tx, tx_total, rx, rx_total;
+ uint32_t device_fh;
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ while(1) {
+ sleep(enable_stats);
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, top_left);
+
+ printf("\nDevice statistics ====================================");
+
+ dev_ll = ll_root_used;
+ while (dev_ll != NULL) {
+ device_fh = (uint32_t)dev_ll->vdev->dev->device_fh;
+ tx_total = dev_statistics[device_fh].tx_total;
+ tx = dev_statistics[device_fh].tx;
+ tx_dropped = tx_total - tx;
+ if (zero_copy == 0) {
+ rx_total = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_total_atomic);
+ rx = rte_atomic64_read(
+ &dev_statistics[device_fh].rx_atomic);
+ } else {
+ rx_total = dev_statistics[device_fh].rx_total;
+ rx = dev_statistics[device_fh].rx;
+ }
+ rx_dropped = rx_total - rx;
+
+ printf("\nStatistics for device %"PRIu32" ------------------------------"
+ "\nTX total: %"PRIu64""
+ "\nTX dropped: %"PRIu64""
+ "\nTX successful: %"PRIu64""
+ "\nRX total: %"PRIu64""
+ "\nRX dropped: %"PRIu64""
+ "\nRX successful: %"PRIu64"",
+ device_fh,
+ tx_total,
+ tx_dropped,
+ tx,
+ rx_total,
+ rx_dropped,
+ rx);
+
+ dev_ll = dev_ll->next;
+ }
+ printf("\n======================================================\n");
+ }
+}
+
+static void
+setup_mempool_tbl(int socket, uint32_t index, char *pool_name,
+ char *ring_name, uint32_t nb_mbuf)
+{
+ vpool_array[index].pool = rte_pktmbuf_pool_create(pool_name, nb_mbuf,
+ MBUF_CACHE_SIZE_ZCP, 0, MBUF_DATA_SIZE_ZCP, socket);
+ if (vpool_array[index].pool != NULL) {
+ vpool_array[index].ring
+ = rte_ring_create(ring_name,
+ rte_align32pow2(nb_mbuf + 1),
+ socket, RING_F_SP_ENQ | RING_F_SC_DEQ);
+ if (likely(vpool_array[index].ring != NULL)) {
+ LOG_DEBUG(VHOST_CONFIG,
+ "in setup_mempool_tbl: mbuf count in "
+ "mempool is: %d\n",
+ rte_mempool_count(vpool_array[index].pool));
+ LOG_DEBUG(VHOST_CONFIG,
+ "in setup_mempool_tbl: mbuf count in "
+ "ring is: %d\n",
+ rte_ring_count(vpool_array[index].ring));
+ } else {
+ rte_exit(EXIT_FAILURE, "ring_create(%s) failed",
+ ring_name);
+ }
+
+ /* Need consider head room. */
+ vpool_array[index].buf_size = VIRTIO_DESCRIPTOR_LEN_ZCP;
+ } else {
+ rte_exit(EXIT_FAILURE, "mempool_create(%s) failed", pool_name);
+ }
+}
+
+/* When we receive a INT signal, unregister vhost driver */
+static void
+sigint_handler(__rte_unused int signum)
+{
+ /* Unregister vhost driver. */
+ int ret = rte_vhost_driver_unregister((char *)&dev_basename);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "vhost driver unregister failure.\n");
+ exit(0);
+}
+
+/*
+ * Main function, does initialisation and calls the per-lcore functions. The CUSE
+ * device is also registered here to handle the IOCTLs.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool = NULL;
+ unsigned lcore_id, core_id = 0;
+ unsigned nb_ports, valid_num_ports;
+ int ret;
+ uint8_t portid;
+ uint16_t queue_id;
+ static pthread_t tid;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ signal(SIGINT, sigint_handler);
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse app arguments */
+ ret = us_vhost_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid argument\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)
+ if (rte_lcore_is_enabled(lcore_id))
+ lcore_ids[core_id ++] = lcore_id;
+
+ if (rte_lcore_count() > RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,"Not enough cores\n");
+
+ /*set the number of swithcing cores available*/
+ num_switching_cores = rte_lcore_count()-1;
+
+ /* Get the number of physical ports. */
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Update the global var NUM_PORTS and global array PORTS
+ * and get value of var VALID_NUM_PORTS according to system ports number
+ */
+ valid_num_ports = check_ports_num(nb_ports);
+
+ if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
+ RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
+ return -1;
+ }
+
+ if (zero_copy == 0) {
+ /* Create the mbuf pool. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE,
+ 0, MBUF_DATA_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ for (queue_id = 0; queue_id < MAX_QUEUES + 1; queue_id++)
+ vpool_array[queue_id].pool = mbuf_pool;
+
+ if (vm2vm_mode == VM2VM_HARDWARE) {
+ /* Enable VT loop back to let L2 switch to do it. */
+ vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
+ LOG_DEBUG(VHOST_CONFIG,
+ "Enable loop back for L2 switch in vmdq.\n");
+ }
+ } else {
+ uint32_t nb_mbuf;
+ char pool_name[RTE_MEMPOOL_NAMESIZE];
+ char ring_name[RTE_MEMPOOL_NAMESIZE];
+
+ nb_mbuf = num_rx_descriptor
+ + num_switching_cores * MBUF_CACHE_SIZE_ZCP
+ + num_switching_cores * MAX_PKT_BURST;
+
+ for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {
+ snprintf(pool_name, sizeof(pool_name),
+ "rxmbuf_pool_%u", queue_id);
+ snprintf(ring_name, sizeof(ring_name),
+ "rxmbuf_ring_%u", queue_id);
+ setup_mempool_tbl(rte_socket_id(), queue_id,
+ pool_name, ring_name, nb_mbuf);
+ }
+
+ nb_mbuf = num_tx_descriptor
+ + num_switching_cores * MBUF_CACHE_SIZE_ZCP
+ + num_switching_cores * MAX_PKT_BURST;
+
+ for (queue_id = 0; queue_id < MAX_QUEUES; queue_id++) {
+ snprintf(pool_name, sizeof(pool_name),
+ "txmbuf_pool_%u", queue_id);
+ snprintf(ring_name, sizeof(ring_name),
+ "txmbuf_ring_%u", queue_id);
+ setup_mempool_tbl(rte_socket_id(),
+ (queue_id + MAX_QUEUES),
+ pool_name, ring_name, nb_mbuf);
+ }
+
+ if (vm2vm_mode == VM2VM_HARDWARE) {
+ /* Enable VT loop back to let L2 switch to do it. */
+ vmdq_conf_default.rx_adv_conf.vmdq_rx_conf.enable_loop_back = 1;
+ LOG_DEBUG(VHOST_CONFIG,
+ "Enable loop back for L2 switch in vmdq.\n");
+ }
+ }
+ /* Set log level. */
+ rte_set_log_level(LOG_LEVEL);
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ RTE_LOG(INFO, VHOST_PORT,
+ "Skipping disabled port %d\n", portid);
+ continue;
+ }
+ if (port_init(portid) != 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot initialize network ports\n");
+ }
+
+ /* Initialise all linked lists. */
+ if (init_data_ll() == -1)
+ rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n");
+
+ /* Initialize device stats */
+ memset(&dev_statistics, 0, sizeof(dev_statistics));
+
+ /* Enable stats if the user option is set. */
+ if (enable_stats) {
+ ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot create print-stats thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-stats");
+ ret = rte_thread_setname(tid, thread_name);
+ if (ret != 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Cannot set print-stats name\n");
+ }
+
+ /* Launch all data cores. */
+ if (zero_copy == 0) {
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(switch_worker,
+ mbuf_pool, lcore_id);
+ }
+ } else {
+ uint32_t count_in_mempool, index, i;
+ for (index = 0; index < 2*MAX_QUEUES; index++) {
+ /* For all RX and TX queues. */
+ count_in_mempool
+ = rte_mempool_count(vpool_array[index].pool);
+
+ /*
+ * Transfer all un-attached mbufs from vpool.pool
+ * to vpoo.ring.
+ */
+ for (i = 0; i < count_in_mempool; i++) {
+ struct rte_mbuf *mbuf
+ = __rte_mbuf_raw_alloc(
+ vpool_array[index].pool);
+ rte_ring_sp_enqueue(vpool_array[index].ring,
+ (void *)mbuf);
+ }
+
+ LOG_DEBUG(VHOST_CONFIG,
+ "in main: mbuf count in mempool at initial "
+ "is: %d\n", count_in_mempool);
+ LOG_DEBUG(VHOST_CONFIG,
+ "in main: mbuf count in ring at initial is :"
+ " %d\n",
+ rte_ring_count(vpool_array[index].ring));
+ }
+
+ RTE_LCORE_FOREACH_SLAVE(lcore_id)
+ rte_eal_remote_launch(switch_worker_zcp, NULL,
+ lcore_id);
+ }
+
+ if (mergeable == 0)
+ rte_vhost_feature_disable(1ULL << VIRTIO_NET_F_MRG_RXBUF);
+
+ /* Register vhost(cuse or user) driver to handle vhost messages. */
+ ret = rte_vhost_driver_register((char *)&dev_basename);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE, "vhost driver register failure.\n");
+
+ rte_vhost_driver_callback_register(&virtio_net_device_ops);
+
+ /* Start CUSE session. */
+ rte_vhost_driver_session_start();
+ return 0;
+
+}
diff --git a/examples/vhost/main.h b/examples/vhost/main.h
new file mode 100644
index 00000000..d04e2be2
--- /dev/null
+++ b/examples/vhost/main.h
@@ -0,0 +1,115 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) do { \
+ RTE_LOG(DEBUG, log_type, fmt, ##args); \
+} while (0)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do{} while(0)
+#endif
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2
+#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3
+
+/**
+ * Information relating to memory regions including offsets to
+ * addresses in host physical space.
+ */
+struct virtio_memory_regions_hpa {
+ /**< Base guest physical address of region. */
+ uint64_t guest_phys_address;
+ /**< End guest physical address of region. */
+ uint64_t guest_phys_address_end;
+ /**< Size of region. */
+ uint64_t memory_size;
+ /**< Offset of region for gpa to hpa translation. */
+ uint64_t host_phys_addr_offset;
+};
+
+/*
+ * Device linked list structure for data path.
+ */
+struct vhost_dev {
+ /**< Pointer to device created by vhost lib. */
+ struct virtio_net *dev;
+ /**< Number of memory regions for gpa to hpa translation. */
+ uint32_t nregions_hpa;
+ /**< Memory region information for gpa to hpa translation. */
+ struct virtio_memory_regions_hpa *regions_hpa;
+ /**< Device MAC address (Obtained on first TX packet). */
+ struct ether_addr mac_address;
+ /**< RX VMDQ queue number. */
+ uint16_t vmdq_rx_q;
+ /**< Vlan tag assigned to the pool */
+ uint32_t vlan_tag;
+ /**< Data core that the device is added to. */
+ uint16_t coreid;
+ /**< A device is set as ready if the MAC address has been set. */
+ volatile uint8_t ready;
+ /**< Device is marked for removal from the data core. */
+ volatile uint8_t remove;
+} __rte_cache_aligned;
+
+struct virtio_net_data_ll
+{
+ struct vhost_dev *vdev; /* Pointer to device created by configuration core. */
+ struct virtio_net_data_ll *next; /* Pointer to next device in linked list. */
+};
+
+/*
+ * Structure containing data core specific information.
+ */
+struct lcore_ll_info
+{
+ struct virtio_net_data_ll *ll_root_free; /* Pointer to head in free linked list. */
+ struct virtio_net_data_ll *ll_root_used; /* Pointer to head of used linked list. */
+ uint32_t device_num; /* Number of devices on lcore. */
+ volatile uint8_t dev_removal_flag; /* Flag to synchronize device removal. */
+};
+
+struct lcore_info
+{
+ struct lcore_ll_info *lcore_ll; /* Pointer to data core specific lcore_ll_info struct */
+};
+
+#endif /* _MAIN_H_ */
diff --git a/examples/vhost_xen/Makefile b/examples/vhost_xen/Makefile
new file mode 100644
index 00000000..47e14898
--- /dev/null
+++ b/examples/vhost_xen/Makefile
@@ -0,0 +1,52 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vhost-switch
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vhost_monitor.c xenstore_parse.c
+
+CFLAGS += -O2 -I/usr/local/include -D_FILE_OFFSET_BITS=64 -Wno-unused-parameter
+CFLAGS += $(WERROR_FLAGS)
+CFLAGS += -D_GNU_SOURCE
+LDFLAGS += -lxenstore
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vhost_xen/main.c b/examples/vhost_xen/main.c
new file mode 100644
index 00000000..d83138d6
--- /dev/null
+++ b/examples/vhost_xen/main.c
@@ -0,0 +1,1530 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <arpa/inet.h>
+#include <getopt.h>
+#include <linux/if_ether.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <linux/virtio_ring.h>
+#include <signal.h>
+#include <stdint.h>
+#include <sys/eventfd.h>
+#include <sys/param.h>
+#include <unistd.h>
+
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_string_fns.h>
+
+#include "main.h"
+#include "virtio-net.h"
+#include "xen_vhost.h"
+
+#define MAX_QUEUES 128
+
+/* the maximum number of external ports supported */
+#define MAX_SUP_PORTS 1
+
+/*
+ * Calculate the number of buffers needed per port
+ */
+#define NUM_MBUFS_PER_PORT ((MAX_QUEUES*RTE_TEST_RX_DESC_DEFAULT) + \
+ (num_switching_cores*MAX_PKT_BURST) + \
+ (num_switching_cores*RTE_TEST_TX_DESC_DEFAULT) +\
+ (num_switching_cores*MBUF_CACHE_SIZE))
+
+#define MBUF_CACHE_SIZE 64
+
+/*
+ * RX and TX Prefetch, Host, and Write-back threshold values should be
+ * carefully set for optimal performance. Consult the network
+ * controller's datasheet and supporting DPDK documentation for guidance
+ * on how these parameters should be set.
+ */
+#define RX_PTHRESH 8 /* Default values of RX prefetch threshold reg. */
+#define RX_HTHRESH 8 /* Default values of RX host threshold reg. */
+#define RX_WTHRESH 4 /* Default values of RX write-back threshold reg. */
+
+/*
+ * These default values are optimized for use with the Intel(R) 82599 10 GbE
+ * Controller and the DPDK ixgbe PMD. Consider using other values for other
+ * network controllers and/or network drivers.
+ */
+#define TX_PTHRESH 36 /* Default values of TX prefetch threshold reg. */
+#define TX_HTHRESH 0 /* Default values of TX host threshold reg. */
+#define TX_WTHRESH 0 /* Default values of TX write-back threshold reg. */
+
+#define MAX_PKT_BURST 32 /* Max burst size for RX/TX */
+#define MAX_MRG_PKT_BURST 16 /* Max burst for merge buffers. Set to 1 due to performance issue. */
+#define BURST_TX_DRAIN_US 100 /* TX drain every ~100us */
+
+/* State of virtio device. */
+#define DEVICE_NOT_READY 0
+#define DEVICE_READY 1
+#define DEVICE_SAFE_REMOVE 2
+
+/* Config_core_flag status definitions. */
+#define REQUEST_DEV_REMOVAL 1
+#define ACK_DEV_REMOVAL 0
+
+/* Configurable number of RX/TX ring descriptors */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+#define INVALID_PORT_ID 0xFF
+
+/* Max number of devices. Limited by vmdq. */
+#define MAX_DEVICES 64
+
+/* Size of buffers used for snprintfs. */
+#define MAX_PRINT_BUFF 6072
+
+
+/* Maximum long option length for option parsing. */
+#define MAX_LONG_OPT_SZ 64
+
+/* Used to compare MAC addresses. */
+#define MAC_ADDR_CMP 0xFFFFFFFFFFFF
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask = 0;
+
+/*Number of switching cores enabled*/
+static uint32_t num_switching_cores = 0;
+
+/* number of devices/queues to support*/
+static uint32_t num_queues = 0;
+uint32_t num_devices = 0;
+
+/* Enable VM2VM communications. If this is disabled then the MAC address compare is skipped. */
+static uint32_t enable_vm2vm = 1;
+/* Enable stats. */
+static uint32_t enable_stats = 0;
+
+/* empty vmdq configuration structure. Filled in programatically */
+static const struct rte_eth_conf vmdq_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ /*
+ * It is necessary for 1G NIC such as I350,
+ * this fixes bug of ipv4 forwarding in guest can't
+ * forward pakets from one virtio dev to another virtio dev.
+ */
+ .hw_vlan_strip = 1, /**< VLAN strip enabled. */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ .hw_strip_crc = 0, /**< CRC stripped by hardware */
+ },
+
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .rx_adv_conf = {
+ /*
+ * should be overridden separately in code with
+ * appropriate values
+ */
+ .vmdq_rx_conf = {
+ .nb_queue_pools = ETH_8_POOLS,
+ .enable_default_pool = 0,
+ .default_pool = 0,
+ .nb_pool_maps = 0,
+ .pool_map = {{0, 0},},
+ },
+ },
+};
+
+static unsigned lcore_ids[RTE_MAX_LCORE];
+static uint8_t ports[RTE_MAX_ETHPORTS];
+static unsigned num_ports = 0; /**< The number of ports specified in command line */
+
+const uint16_t vlan_tags[] = {
+ 1000, 1001, 1002, 1003, 1004, 1005, 1006, 1007,
+ 1008, 1009, 1010, 1011, 1012, 1013, 1014, 1015,
+ 1016, 1017, 1018, 1019, 1020, 1021, 1022, 1023,
+ 1024, 1025, 1026, 1027, 1028, 1029, 1030, 1031,
+ 1032, 1033, 1034, 1035, 1036, 1037, 1038, 1039,
+ 1040, 1041, 1042, 1043, 1044, 1045, 1046, 1047,
+ 1048, 1049, 1050, 1051, 1052, 1053, 1054, 1055,
+ 1056, 1057, 1058, 1059, 1060, 1061, 1062, 1063,
+};
+
+/* ethernet addresses of ports */
+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* heads for the main used and free linked lists for the data path. */
+static struct virtio_net_data_ll *ll_root_used = NULL;
+static struct virtio_net_data_ll *ll_root_free = NULL;
+
+/* Array of data core structures containing information on individual core linked lists. */
+static struct lcore_info lcore_info[RTE_MAX_LCORE];
+
+/* Used for queueing bursts of TX packets. */
+struct mbuf_table {
+ unsigned len;
+ unsigned txq_id;
+ struct rte_mbuf *m_table[MAX_PKT_BURST];
+};
+
+/* TX queue for each data core. */
+struct mbuf_table lcore_tx_queue[RTE_MAX_LCORE];
+
+/* Vlan header struct used to insert vlan tags on TX. */
+struct vlan_ethhdr {
+ unsigned char h_dest[ETH_ALEN];
+ unsigned char h_source[ETH_ALEN];
+ __be16 h_vlan_proto;
+ __be16 h_vlan_TCI;
+ __be16 h_vlan_encapsulated_proto;
+};
+
+/* Header lengths. */
+#define VLAN_HLEN 4
+#define VLAN_ETH_HLEN 18
+
+/* Per-device statistics struct */
+struct device_statistics {
+ uint64_t tx_total;
+ rte_atomic64_t rx_total;
+ uint64_t tx;
+ rte_atomic64_t rx;
+} __rte_cache_aligned;
+struct device_statistics dev_statistics[MAX_DEVICES];
+
+/*
+ * Builds up the correct configuration for VMDQ VLAN pool map
+ * according to the pool & queue limits.
+ */
+static inline int
+get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_devices)
+{
+ struct rte_eth_vmdq_rx_conf conf;
+ unsigned i;
+
+ memset(&conf, 0, sizeof(conf));
+ conf.nb_queue_pools = (enum rte_eth_nb_pools)num_devices;
+ conf.nb_pool_maps = num_devices;
+
+ for (i = 0; i < conf.nb_pool_maps; i++) {
+ conf.pool_map[i].vlan_id = vlan_tags[ i ];
+ conf.pool_map[i].pools = (1UL << i);
+ }
+
+ (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
+ sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
+ return 0;
+}
+
+/*
+ * Validate the device number according to the max pool number gotten form dev_info
+ * If the device number is invalid, give the error message and return -1.
+ * Each device must have its own pool.
+ */
+static inline int
+validate_num_devices(uint32_t max_nb_devices)
+{
+ if (num_devices > max_nb_devices) {
+ RTE_LOG(ERR, VHOST_PORT, "invalid number of devices\n");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_rxconf *rxconf;
+ struct rte_eth_conf port_conf;
+ uint16_t rx_rings, tx_rings = (uint16_t)rte_lcore_count();
+ const uint16_t rx_ring_size = RTE_TEST_RX_DESC_DEFAULT, tx_ring_size = RTE_TEST_TX_DESC_DEFAULT;
+ int retval;
+ uint16_t q;
+
+ /* The max pool number from dev_info will be used to validate the pool number specified in cmd line */
+ rte_eth_dev_info_get (port, &dev_info);
+
+ /*configure the number of supported virtio devices based on VMDQ limits */
+ num_devices = dev_info.max_vmdq_pools;
+ num_queues = dev_info.max_rx_queues;
+
+ retval = validate_num_devices(MAX_DEVICES);
+ if (retval < 0)
+ return retval;
+
+ /* Get port configuration. */
+ retval = get_eth_conf(&port_conf, num_devices);
+ if (retval < 0)
+ return retval;
+
+ if (port >= rte_eth_dev_count()) return -1;
+
+ rx_rings = (uint16_t)num_queues,
+ /* Configure ethernet device. */
+ retval = rte_eth_dev_configure(port, rx_rings, tx_rings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ rte_eth_dev_info_get(port, &dev_info);
+ rxconf = &dev_info.default_rxconf;
+ rxconf->rx_drop_en = 1;
+ /* Setup the queues. */
+ for (q = 0; q < rx_rings; q ++) {
+ retval = rte_eth_rx_queue_setup(port, q, rx_ring_size,
+ rte_eth_dev_socket_id(port), rxconf,
+ mbuf_pool);
+ if (retval < 0)
+ return retval;
+ }
+ for (q = 0; q < tx_rings; q ++) {
+ retval = rte_eth_tx_queue_setup(port, q, tx_ring_size,
+ rte_eth_dev_socket_id(port),
+ NULL);
+ if (retval < 0)
+ return retval;
+ }
+
+ /* Start the device. */
+ retval = rte_eth_dev_start(port);
+ if (retval < 0)
+ return retval;
+
+ rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+ RTE_LOG(INFO, VHOST_PORT, "Max virtio devices supported: %u\n", num_devices);
+ RTE_LOG(INFO, VHOST_PORT, "Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ vmdq_ports_eth_addr[port].addr_bytes[0],
+ vmdq_ports_eth_addr[port].addr_bytes[1],
+ vmdq_ports_eth_addr[port].addr_bytes[2],
+ vmdq_ports_eth_addr[port].addr_bytes[3],
+ vmdq_ports_eth_addr[port].addr_bytes[4],
+ vmdq_ports_eth_addr[port].addr_bytes[5]);
+
+ return 0;
+}
+
+/*
+ * Parse the portmask provided at run time.
+ */
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ errno = 0;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+
+}
+
+/*
+ * Parse num options at run time.
+ */
+static int
+parse_num_opt(const char *q_arg, uint32_t max_valid_value)
+{
+ char *end = NULL;
+ unsigned long num;
+
+ errno = 0;
+
+ /* parse unsigned int string */
+ num = strtoul(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0') || (errno != 0))
+ return -1;
+
+ if (num > max_valid_value)
+ return -1;
+
+ return num;
+
+}
+
+/*
+ * Display usage
+ */
+static void
+us_vhost_usage(const char *prgname)
+{
+ RTE_LOG(INFO, VHOST_CONFIG, "%s [EAL options] -- -p PORTMASK --vm2vm [0|1] --stats [0-N] --nb-devices ND\n"
+ " -p PORTMASK: Set mask for ports to be used by application\n"
+ " --vm2vm [0|1]: disable/enable(default) vm2vm comms\n"
+ " --stats [0-N]: 0: Disable stats, N: Time in seconds to print stats\n",
+ prgname);
+}
+
+/*
+ * Parse the arguments given in the command line of the application.
+ */
+static int
+us_vhost_parse_args(int argc, char **argv)
+{
+ int opt, ret;
+ int option_index;
+ unsigned i;
+ const char *prgname = argv[0];
+ static struct option long_option[] = {
+ {"vm2vm", required_argument, NULL, 0},
+ {"stats", required_argument, NULL, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:",long_option, &option_index)) != EOF) {
+ switch (opt) {
+ /* Portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid portmask\n");
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ break;
+
+ case 0:
+ /* Enable/disable vm2vm comms. */
+ if (!strncmp(long_option[option_index].name, "vm2vm", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, 1);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for vm2vm [0|1]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ enable_vm2vm = ret;
+ }
+ }
+
+ /* Enable/disable stats. */
+ if (!strncmp(long_option[option_index].name, "stats", MAX_LONG_OPT_SZ)) {
+ ret = parse_num_opt(optarg, INT32_MAX);
+ if (ret == -1) {
+ RTE_LOG(INFO, VHOST_CONFIG, "Invalid argument for stats [0..N]\n");
+ us_vhost_usage(prgname);
+ return -1;
+ } else {
+ enable_stats = ret;
+ }
+ }
+ break;
+
+ /* Invalid option - print options. */
+ default:
+ us_vhost_usage(prgname);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (enabled_port_mask & (1 << i))
+ ports[num_ports++] = (uint8_t)i;
+ }
+
+ if ((num_ports == 0) || (num_ports > MAX_SUP_PORTS)) {
+ RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Update the global var NUM_PORTS and array PORTS according to system ports number
+ * and return valid ports number
+ */
+static unsigned check_ports_num(unsigned nb_ports)
+{
+ unsigned valid_num_ports = num_ports;
+ unsigned portid;
+
+ if (num_ports > nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT, "\nSpecified port number(%u) exceeds total system port number(%u)\n",
+ num_ports, nb_ports);
+ num_ports = nb_ports;
+ }
+
+ for (portid = 0; portid < num_ports; portid ++) {
+ if (ports[portid] >= nb_ports) {
+ RTE_LOG(INFO, VHOST_PORT, "\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
+ ports[portid], (nb_ports - 1));
+ ports[portid] = INVALID_PORT_ID;
+ valid_num_ports--;
+ }
+ }
+ return valid_num_ports;
+}
+
+/*
+ * Macro to print out packet contents. Wrapped in debug define so that the
+ * data path is not effected when debug is disabled.
+ */
+#ifdef DEBUG
+#define PRINT_PACKET(device, addr, size, header) do { \
+ char *pkt_addr = (char*)(addr); \
+ unsigned int index; \
+ char packet[MAX_PRINT_BUFF]; \
+ \
+ if ((header)) \
+ snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Header size %d: ", (device->device_fh), (size)); \
+ else \
+ snprintf(packet, MAX_PRINT_BUFF, "(%"PRIu64") Packet size %d: ", (device->device_fh), (size)); \
+ for (index = 0; index < (size); index++) { \
+ snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), \
+ "%02hhx ", pkt_addr[index]); \
+ } \
+ snprintf(packet + strnlen(packet, MAX_PRINT_BUFF), MAX_PRINT_BUFF - strnlen(packet, MAX_PRINT_BUFF), "\n"); \
+ \
+ LOG_DEBUG(VHOST_DATA, "%s", packet); \
+} while(0)
+#else
+#define PRINT_PACKET(device, addr, size, header) do{} while(0)
+#endif
+
+/*
+ * Function to convert guest physical addresses to vhost virtual addresses. This
+ * is used to convert virtio buffer addresses.
+ */
+static inline uint64_t __attribute__((always_inline))
+gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa)
+{
+ struct virtio_memory_regions *region;
+ uint32_t regionidx;
+ uint64_t vhost_va = 0;
+
+ for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) {
+ region = &dev->mem->regions[regionidx];
+ if ((guest_pa >= region->guest_phys_address) &&
+ (guest_pa <= region->guest_phys_address_end)) {
+ vhost_va = region->address_offset + guest_pa;
+ break;
+ }
+ }
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") GPA %p| VVA %p\n",
+ dev->device_fh, (void*)(uintptr_t)guest_pa, (void*)(uintptr_t)vhost_va);
+
+ return vhost_va;
+}
+
+/*
+ * This function adds buffers to the virtio devices RX virtqueue. Buffers can
+ * be received from the physical port or from another virtio device. A packet
+ * count is returned to indicate the number of packets that were succesfully
+ * added to the RX queue.
+ */
+static inline uint32_t __attribute__((always_inline))
+virtio_dev_rx(struct virtio_net *dev, struct rte_mbuf **pkts, uint32_t count)
+{
+ struct vhost_virtqueue *vq;
+ struct vring_desc *desc;
+ struct rte_mbuf *buff;
+ /* The virtio_hdr is initialised to 0. */
+ struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0,0,0,0,0,0},0};
+ uint64_t buff_addr = 0;
+ uint64_t buff_hdr_addr = 0;
+ uint32_t head[MAX_PKT_BURST], packet_len = 0;
+ uint32_t head_idx, packet_success = 0;
+ uint16_t avail_idx, res_cur_idx;
+ uint16_t res_base_idx, res_end_idx;
+ uint16_t free_entries;
+ uint8_t success = 0;
+ void *userdata;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh);
+ vq = dev->virtqueue_rx;
+ count = (count > MAX_PKT_BURST) ? MAX_PKT_BURST : count;
+ /* As many data cores may want access to available buffers, they need to be reserved. */
+ do {
+
+ res_base_idx = vq->last_used_idx_res;
+
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+
+ free_entries = (avail_idx - res_base_idx);
+
+ /*check that we have enough buffers*/
+ if (unlikely(count > free_entries))
+ count = free_entries;
+
+ if (count == 0)
+ return 0;
+
+ res_end_idx = res_base_idx + count;
+ /* vq->last_used_idx_res is atomically updated. */
+ success = rte_atomic16_cmpset(&vq->last_used_idx_res, res_base_idx,
+ res_end_idx);
+ } while (unlikely(success == 0));
+ res_cur_idx = res_base_idx;
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Current Index %d| End Index %d\n", dev->device_fh, res_cur_idx, res_end_idx);
+
+ /* Prefetch available ring to retrieve indexes. */
+ rte_prefetch0(&vq->avail->ring[res_cur_idx & (vq->size - 1)]);
+
+ /* Retrieve all of the head indexes first to avoid caching issues. */
+ for (head_idx = 0; head_idx < count; head_idx++)
+ head[head_idx] = vq->avail->ring[(res_cur_idx + head_idx) & (vq->size - 1)];
+
+ /*Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+
+ while (res_cur_idx != res_end_idx) {
+ /* Get descriptor from available ring */
+ desc = &vq->desc[head[packet_success]];
+ /* Prefetch descriptor address. */
+ rte_prefetch0(desc);
+
+ buff = pkts[packet_success];
+
+ /* Convert from gpa to vva (guest physical addr -> vhost virtual addr) */
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ /* Prefetch buffer address. */
+ rte_prefetch0((void*)(uintptr_t)buff_addr);
+
+ {
+ /* Copy virtio_hdr to packet and increment buffer address */
+ buff_hdr_addr = buff_addr;
+ packet_len = rte_pktmbuf_data_len(buff) + vq->vhost_hlen;
+
+ /*
+ * If the descriptors are chained the header and data are placed in
+ * separate buffers.
+ */
+ if (desc->flags & VRING_DESC_F_NEXT) {
+ desc->len = vq->vhost_hlen;
+ desc = &vq->desc[desc->next];
+ /* Buffer address translation. */
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ desc->len = rte_pktmbuf_data_len(buff);
+ } else {
+ buff_addr += vq->vhost_hlen;
+ desc->len = packet_len;
+ }
+ }
+
+ /* Update used ring with desc information */
+ vq->used->ring[res_cur_idx & (vq->size - 1)].id = head[packet_success];
+ vq->used->ring[res_cur_idx & (vq->size - 1)].len = packet_len;
+
+ /* Copy mbuf data to buffer */
+ userdata = rte_pktmbuf_mtod(buff, void *);
+ rte_memcpy((void *)(uintptr_t)buff_addr, userdata, rte_pktmbuf_data_len(buff));
+
+ res_cur_idx++;
+ packet_success++;
+
+ /* mergeable is disabled then a header is required per buffer. */
+ rte_memcpy((void *)(uintptr_t)buff_hdr_addr, (const void *)&virtio_hdr, vq->vhost_hlen);
+ if (res_cur_idx < res_end_idx) {
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+ }
+ }
+
+ rte_compiler_barrier();
+
+ /* Wait until it's our turn to add our buffer to the used ring. */
+ while (unlikely(vq->last_used_idx != res_base_idx))
+ rte_pause();
+
+ *(volatile uint16_t *)&vq->used->idx += count;
+
+ vq->last_used_idx = res_end_idx;
+
+ return count;
+}
+
+/*
+ * Compares a packet destination MAC address to a device MAC address.
+ */
+static inline int __attribute__((always_inline))
+ether_addr_cmp(struct ether_addr *ea, struct ether_addr *eb)
+{
+ return ((*(uint64_t *)ea ^ *(uint64_t *)eb) & MAC_ADDR_CMP) == 0;
+}
+
+/*
+ * This function registers mac along with a
+ * vlan tag to a VMDQ.
+ */
+static int
+link_vmdq(struct virtio_net *dev)
+{
+ int ret;
+ struct virtio_net_data_ll *dev_ll;
+
+ dev_ll = ll_root_used;
+
+ while (dev_ll != NULL) {
+ if ((dev != dev_ll->dev) && ether_addr_cmp(&dev->mac_address, &dev_ll->dev->mac_address)) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") WARNING: This device is using an existing MAC address and has not been registered.\n", dev->device_fh);
+ return -1;
+ }
+ dev_ll = dev_ll->next;
+ }
+
+ /* vlan_tag currently uses the device_id. */
+ dev->vlan_tag = vlan_tags[dev->device_fh];
+ dev->vmdq_rx_q = dev->device_fh * (num_queues/num_devices);
+
+ /* Print out VMDQ registration info. */
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") MAC_ADDRESS %02x:%02x:%02x:%02x:%02x:%02x and VLAN_TAG %d registered\n",
+ dev->device_fh,
+ dev->mac_address.addr_bytes[0], dev->mac_address.addr_bytes[1],
+ dev->mac_address.addr_bytes[2], dev->mac_address.addr_bytes[3],
+ dev->mac_address.addr_bytes[4], dev->mac_address.addr_bytes[5],
+ dev->vlan_tag);
+
+ /* Register the MAC address. */
+ ret = rte_eth_dev_mac_addr_add(ports[0], &dev->mac_address, (uint32_t)dev->device_fh);
+ if (ret) {
+ RTE_LOG(ERR, VHOST_DATA, "(%"PRIu64") Failed to add device MAC address to VMDQ\n",
+ dev->device_fh);
+ return -1;
+ }
+
+ /* Enable stripping of the vlan tag as we handle routing. */
+ rte_eth_dev_set_vlan_strip_on_queue(ports[0], dev->vmdq_rx_q, 1);
+
+ rte_compiler_barrier();
+ /* Set device as ready for RX. */
+ dev->ready = DEVICE_READY;
+
+ return 0;
+}
+
+/*
+ * Removes MAC address and vlan tag from VMDQ. Ensures that nothing is adding buffers to the RX
+ * queue before disabling RX on the device.
+ */
+static inline void
+unlink_vmdq(struct virtio_net *dev)
+{
+ unsigned i = 0;
+ unsigned rx_count;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+
+ if (dev->ready == DEVICE_READY) {
+ /*clear MAC and VLAN settings*/
+ rte_eth_dev_mac_addr_remove(ports[0], &dev->mac_address);
+ for (i = 0; i < 6; i++)
+ dev->mac_address.addr_bytes[i] = 0;
+
+ dev->vlan_tag = 0;
+
+ /*Clear out the receive buffers*/
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+
+ while (rx_count) {
+ for (i = 0; i < rx_count; i++)
+ rte_pktmbuf_free(pkts_burst[i]);
+
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+ }
+
+ dev->ready = DEVICE_NOT_READY;
+ }
+}
+
+/*
+ * Check if the packet destination MAC address is for a local device. If so then put
+ * the packet on that devices RX queue. If not then return.
+ */
+static inline unsigned __attribute__((always_inline))
+virtio_tx_local(struct virtio_net *dev, struct rte_mbuf *m)
+{
+ struct virtio_net_data_ll *dev_ll;
+ struct ether_hdr *pkt_hdr;
+ uint64_t ret = 0;
+
+ pkt_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /*get the used devices list*/
+ dev_ll = ll_root_used;
+
+ while (dev_ll != NULL) {
+ if (likely(dev_ll->dev->ready == DEVICE_READY) && ether_addr_cmp(&(pkt_hdr->d_addr),
+ &dev_ll->dev->mac_address)) {
+
+ /* Drop the packet if the TX packet is destined for the TX device. */
+ if (dev_ll->dev->device_fh == dev->device_fh) {
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: Source and destination MAC addresses are the same. Dropping packet.\n",
+ dev_ll->dev->device_fh);
+ return 0;
+ }
+
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is local\n", dev_ll->dev->device_fh);
+
+ if (dev_ll->dev->remove) {
+ /*drop the packet if the device is marked for removal*/
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Device is marked for removal\n", dev_ll->dev->device_fh);
+ } else {
+ /*send the packet to the local virtio device*/
+ ret = virtio_dev_rx(dev_ll->dev, &m, 1);
+ if (enable_stats) {
+ rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, 1);
+ rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret);
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx += ret;
+ }
+ }
+
+ return 0;
+ }
+ dev_ll = dev_ll->next;
+ }
+
+ return -1;
+}
+
+/*
+ * This function routes the TX packet to the correct interface. This may be a local device
+ * or the physical port.
+ */
+static inline void __attribute__((always_inline))
+virtio_tx_route(struct virtio_net* dev, struct rte_mbuf *m, struct rte_mempool *mbuf_pool, uint16_t vlan_tag)
+{
+ struct mbuf_table *tx_q;
+ struct vlan_ethhdr *vlan_hdr;
+ struct rte_mbuf **m_table;
+ struct rte_mbuf *mbuf;
+ unsigned len, ret;
+ const uint16_t lcore_id = rte_lcore_id();
+
+ /*check if destination is local VM*/
+ if (enable_vm2vm && (virtio_tx_local(dev, m) == 0)) {
+ return;
+ }
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") TX: MAC address is external\n", dev->device_fh);
+
+ /*Add packet to the port tx queue*/
+ tx_q = &lcore_tx_queue[lcore_id];
+ len = tx_q->len;
+
+ /* Allocate an mbuf and populate the structure. */
+ mbuf = rte_pktmbuf_alloc(mbuf_pool);
+ if(!mbuf)
+ return;
+
+ mbuf->data_len = m->data_len + VLAN_HLEN;
+ mbuf->pkt_len = mbuf->data_len;
+
+ /* Copy ethernet header to mbuf. */
+ rte_memcpy(rte_pktmbuf_mtod(mbuf, void*),
+ rte_pktmbuf_mtod(m, const void*), ETH_HLEN);
+
+
+ /* Setup vlan header. Bytes need to be re-ordered for network with htons()*/
+ vlan_hdr = rte_pktmbuf_mtod(mbuf, struct vlan_ethhdr *);
+ vlan_hdr->h_vlan_encapsulated_proto = vlan_hdr->h_vlan_proto;
+ vlan_hdr->h_vlan_proto = htons(ETH_P_8021Q);
+ vlan_hdr->h_vlan_TCI = htons(vlan_tag);
+
+ /* Copy the remaining packet contents to the mbuf. */
+ rte_memcpy(rte_pktmbuf_mtod_offset(mbuf, void *, VLAN_ETH_HLEN),
+ rte_pktmbuf_mtod_offset(m, const void *, ETH_HLEN),
+ (m->data_len - ETH_HLEN));
+ tx_q->m_table[len] = mbuf;
+ len++;
+ if (enable_stats) {
+ dev_statistics[dev->device_fh].tx_total++;
+ dev_statistics[dev->device_fh].tx++;
+ }
+
+ if (unlikely(len == MAX_PKT_BURST)) {
+ m_table = (struct rte_mbuf **)tx_q->m_table;
+ ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id, m_table, (uint16_t) len);
+ /* Free any buffers not handled by TX and update the port stats. */
+ if (unlikely(ret < len)) {
+ do {
+ rte_pktmbuf_free(m_table[ret]);
+ } while (++ret < len);
+ }
+
+ len = 0;
+ }
+
+ tx_q->len = len;
+ return;
+}
+
+static inline void __attribute__((always_inline))
+virtio_dev_tx(struct virtio_net* dev, struct rte_mempool *mbuf_pool)
+{
+ struct rte_mbuf m;
+ struct vhost_virtqueue *vq;
+ struct vring_desc *desc;
+ uint64_t buff_addr = 0;
+ uint32_t head[MAX_PKT_BURST];
+ uint32_t used_idx;
+ uint32_t i;
+ uint16_t free_entries, packet_success = 0;
+ uint16_t avail_idx;
+
+ vq = dev->virtqueue_tx;
+ avail_idx = *((volatile uint16_t *)&vq->avail->idx);
+
+ /* If there are no available buffers then return. */
+ if (vq->last_used_idx == avail_idx)
+ return;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_tx()\n", dev->device_fh);
+
+ /* Prefetch available ring to retrieve head indexes. */
+ rte_prefetch0(&vq->avail->ring[vq->last_used_idx & (vq->size - 1)]);
+
+ /*get the number of free entries in the ring*/
+ free_entries = avail_idx - vq->last_used_idx;
+ free_entries = unlikely(free_entries < MAX_PKT_BURST) ? free_entries : MAX_PKT_BURST;
+
+ LOG_DEBUG(VHOST_DATA, "(%"PRIu64") Buffers available %d\n", dev->device_fh, free_entries);
+ /* Retrieve all of the head indexes first to avoid caching issues. */
+ for (i = 0; i < free_entries; i++)
+ head[i] = vq->avail->ring[(vq->last_used_idx + i) & (vq->size - 1)];
+
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success]]);
+
+ while (packet_success < free_entries) {
+ desc = &vq->desc[head[packet_success]];
+ /* Prefetch descriptor address. */
+ rte_prefetch0(desc);
+
+ if (packet_success < (free_entries - 1)) {
+ /* Prefetch descriptor index. */
+ rte_prefetch0(&vq->desc[head[packet_success+1]]);
+ }
+
+ /* Update used index buffer information. */
+ used_idx = vq->last_used_idx & (vq->size - 1);
+ vq->used->ring[used_idx].id = head[packet_success];
+ vq->used->ring[used_idx].len = 0;
+
+ /* Discard first buffer as it is the virtio header */
+ desc = &vq->desc[desc->next];
+
+ /* Buffer address translation. */
+ buff_addr = gpa_to_vva(dev, desc->addr);
+ /* Prefetch buffer address. */
+ rte_prefetch0((void*)(uintptr_t)buff_addr);
+
+ /* Setup dummy mbuf. This is copied to a real mbuf if transmitted out the physical port. */
+ m.data_len = desc->len;
+ m.data_off = 0;
+ m.nb_segs = 1;
+
+ virtio_tx_route(dev, &m, mbuf_pool, 0);
+
+ vq->last_used_idx++;
+ packet_success++;
+ }
+
+ rte_compiler_barrier();
+ vq->used->idx += packet_success;
+ /* Kick guest if required. */
+}
+
+/*
+ * This function is called by each data core. It handles all RX/TX registered with the
+ * core. For TX the specific lcore linked list is used. For RX, MAC addresses are compared
+ * with all devices in the main linked list.
+ */
+static int
+switch_worker(__attribute__((unused)) void *arg)
+{
+ struct rte_mempool *mbuf_pool = arg;
+ struct virtio_net *dev = NULL;
+ struct rte_mbuf *pkts_burst[MAX_PKT_BURST];
+ struct virtio_net_data_ll *dev_ll;
+ struct mbuf_table *tx_q;
+ volatile struct lcore_ll_info *lcore_ll;
+ const uint64_t drain_tsc = (rte_get_tsc_hz() + US_PER_S - 1) / US_PER_S * BURST_TX_DRAIN_US;
+ uint64_t prev_tsc, diff_tsc, cur_tsc, ret_count = 0;
+ unsigned ret, i;
+ const uint16_t lcore_id = rte_lcore_id();
+ const uint16_t num_cores = (uint16_t)rte_lcore_count();
+ uint16_t rx_count = 0;
+
+ RTE_LOG(INFO, VHOST_DATA, "Procesing on Core %u started \n", lcore_id);
+ lcore_ll = lcore_info[lcore_id].lcore_ll;
+ prev_tsc = 0;
+
+ tx_q = &lcore_tx_queue[lcore_id];
+ for (i = 0; i < num_cores; i ++) {
+ if (lcore_ids[i] == lcore_id) {
+ tx_q->txq_id = i;
+ break;
+ }
+ }
+
+ while(1) {
+ cur_tsc = rte_rdtsc();
+ /*
+ * TX burst queue drain
+ */
+ diff_tsc = cur_tsc - prev_tsc;
+ if (unlikely(diff_tsc > drain_tsc)) {
+
+ if (tx_q->len) {
+ LOG_DEBUG(VHOST_DATA, "TX queue drained after timeout with burst size %u \n", tx_q->len);
+
+ /*Tx any packets in the queue*/
+ ret = rte_eth_tx_burst(ports[0], (uint16_t)tx_q->txq_id,
+ (struct rte_mbuf **)tx_q->m_table,
+ (uint16_t)tx_q->len);
+ if (unlikely(ret < tx_q->len)) {
+ do {
+ rte_pktmbuf_free(tx_q->m_table[ret]);
+ } while (++ret < tx_q->len);
+ }
+
+ tx_q->len = 0;
+ }
+
+ prev_tsc = cur_tsc;
+
+ }
+
+ /*
+ * Inform the configuration core that we have exited the linked list and that no devices are
+ * in use if requested.
+ */
+ if (lcore_ll->dev_removal_flag == REQUEST_DEV_REMOVAL)
+ lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+
+ /*
+ * Process devices
+ */
+ dev_ll = lcore_ll->ll_root_used;
+
+ while (dev_ll != NULL) {
+ /*get virtio device ID*/
+ dev = dev_ll->dev;
+
+ if (unlikely(dev->remove)) {
+ dev_ll = dev_ll->next;
+ unlink_vmdq(dev);
+ dev->ready = DEVICE_SAFE_REMOVE;
+ continue;
+ }
+ if (likely(dev->ready == DEVICE_READY)) {
+ /*Handle guest RX*/
+ rx_count = rte_eth_rx_burst(ports[0],
+ (uint16_t)dev->vmdq_rx_q, pkts_burst, MAX_PKT_BURST);
+
+ if (rx_count) {
+ ret_count = virtio_dev_rx(dev, pkts_burst, rx_count);
+ if (enable_stats) {
+ rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx_total, rx_count);
+ rte_atomic64_add(&dev_statistics[dev_ll->dev->device_fh].rx, ret_count);
+ }
+ while (likely(rx_count)) {
+ rx_count--;
+ rte_pktmbuf_free_seg(pkts_burst[rx_count]);
+ }
+
+ }
+ }
+
+ if (likely(!dev->remove))
+ /*Handle guest TX*/
+ virtio_dev_tx(dev, mbuf_pool);
+
+ /*move to the next device in the list*/
+ dev_ll = dev_ll->next;
+ }
+ }
+
+ return 0;
+}
+
+/*
+ * Add an entry to a used linked list. A free entry must first be found in the free linked list
+ * using get_data_ll_free_entry();
+ */
+static void
+add_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ /* Set next as NULL and use a compiler barrier to avoid reordering. */
+ ll_dev->next = NULL;
+ rte_compiler_barrier();
+
+ /* If ll == NULL then this is the first device. */
+ if (ll) {
+ /* Increment to the tail of the linked list. */
+ while ((ll->next != NULL) )
+ ll = ll->next;
+
+ ll->next = ll_dev;
+ } else {
+ *ll_root_addr = ll_dev;
+ }
+}
+
+/*
+ * Remove an entry from a used linked list. The entry must then be added to the free linked list
+ * using put_data_ll_free_entry().
+ */
+static void
+rm_data_ll_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev, struct virtio_net_data_ll *ll_dev_last)
+{
+ struct virtio_net_data_ll *ll = *ll_root_addr;
+
+ if (ll_dev == ll)
+ *ll_root_addr = ll_dev->next;
+ else
+ ll_dev_last->next = ll_dev->next;
+}
+
+/*
+ * Find and return an entry from the free linked list.
+ */
+static struct virtio_net_data_ll *
+get_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+ struct virtio_net_data_ll *ll_dev;
+
+ if (ll_free == NULL)
+ return NULL;
+
+ ll_dev = ll_free;
+ *ll_root_addr = ll_free->next;
+
+ return ll_dev;
+}
+
+/*
+ * Place an entry back on to the free linked list.
+ */
+static void
+put_data_ll_free_entry(struct virtio_net_data_ll **ll_root_addr, struct virtio_net_data_ll *ll_dev)
+{
+ struct virtio_net_data_ll *ll_free = *ll_root_addr;
+
+ ll_dev->next = ll_free;
+ *ll_root_addr = ll_dev;
+}
+
+/*
+ * Creates a linked list of a given size.
+ */
+static struct virtio_net_data_ll *
+alloc_data_ll(uint32_t size)
+{
+ struct virtio_net_data_ll *ll_new;
+ uint32_t i;
+
+ /* Malloc and then chain the linked list. */
+ ll_new = malloc(size * sizeof(struct virtio_net_data_ll));
+ if (ll_new == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for ll_new.\n");
+ return NULL;
+ }
+
+ for (i = 0; i < size - 1; i++) {
+ ll_new[i].dev = NULL;
+ ll_new[i].next = &ll_new[i+1];
+ }
+ ll_new[i].next = NULL;
+
+ return ll_new;
+}
+
+/*
+ * Create the main linked list along with each individual cores linked list. A used and a free list
+ * are created to manage entries.
+ */
+static int
+init_data_ll (void)
+{
+ int lcore;
+
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll = malloc(sizeof(struct lcore_ll_info));
+ if (lcore_info[lcore].lcore_ll == NULL) {
+ RTE_LOG(ERR, VHOST_CONFIG, "Failed to allocate memory for lcore_ll.\n");
+ return -1;
+ }
+
+ lcore_info[lcore].lcore_ll->device_num = 0;
+ lcore_info[lcore].lcore_ll->dev_removal_flag = ACK_DEV_REMOVAL;
+ lcore_info[lcore].lcore_ll->ll_root_used = NULL;
+ if (num_devices % num_switching_cores)
+ lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll((num_devices / num_switching_cores) + 1);
+ else
+ lcore_info[lcore].lcore_ll->ll_root_free = alloc_data_ll(num_devices / num_switching_cores);
+ }
+
+ /* Allocate devices up to a maximum of MAX_DEVICES. */
+ ll_root_free = alloc_data_ll(MIN((num_devices), MAX_DEVICES));
+
+ return 0;
+}
+/*
+ * Remove a device from the specific data core linked list and from the main linked list. The
+ * rx/tx thread must be set the flag to indicate that it is safe to remove the device.
+ * used.
+ */
+static void
+destroy_device (volatile struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_lcore_dev_cur;
+ struct virtio_net_data_ll *ll_main_dev_cur;
+ struct virtio_net_data_ll *ll_lcore_dev_last = NULL;
+ struct virtio_net_data_ll *ll_main_dev_last = NULL;
+ int lcore;
+
+ dev->flags &= ~VIRTIO_DEV_RUNNING;
+
+ /*set the remove flag. */
+ dev->remove = 1;
+
+ while(dev->ready != DEVICE_SAFE_REMOVE) {
+ rte_pause();
+ }
+
+ /* Search for entry to be removed from lcore ll */
+ ll_lcore_dev_cur = lcore_info[dev->coreid].lcore_ll->ll_root_used;
+ while (ll_lcore_dev_cur != NULL) {
+ if (ll_lcore_dev_cur->dev == dev) {
+ break;
+ } else {
+ ll_lcore_dev_last = ll_lcore_dev_cur;
+ ll_lcore_dev_cur = ll_lcore_dev_cur->next;
+ }
+ }
+
+ /* Search for entry to be removed from main ll */
+ ll_main_dev_cur = ll_root_used;
+ ll_main_dev_last = NULL;
+ while (ll_main_dev_cur != NULL) {
+ if (ll_main_dev_cur->dev == dev) {
+ break;
+ } else {
+ ll_main_dev_last = ll_main_dev_cur;
+ ll_main_dev_cur = ll_main_dev_cur->next;
+ }
+ }
+
+ if (ll_lcore_dev_cur == NULL || ll_main_dev_cur == NULL) {
+ RTE_LOG(ERR, XENHOST, "%s: could find device in per_cpu list or main_list\n", __func__);
+ return;
+ }
+
+ /* Remove entries from the lcore and main ll. */
+ rm_data_ll_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_used, ll_lcore_dev_cur, ll_lcore_dev_last);
+ rm_data_ll_entry(&ll_root_used, ll_main_dev_cur, ll_main_dev_last);
+
+ /* Set the dev_removal_flag on each lcore. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ lcore_info[lcore].lcore_ll->dev_removal_flag = REQUEST_DEV_REMOVAL;
+ }
+
+ /*
+ * Once each core has set the dev_removal_flag to ACK_DEV_REMOVAL we can be sure that
+ * they can no longer access the device removed from the linked lists and that the devices
+ * are no longer in use.
+ */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ while (lcore_info[lcore].lcore_ll->dev_removal_flag != ACK_DEV_REMOVAL) {
+ rte_pause();
+ }
+ }
+
+ /* Add the entries back to the lcore and main free ll.*/
+ put_data_ll_free_entry(&lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->ll_root_free, ll_lcore_dev_cur);
+ put_data_ll_free_entry(&ll_root_free, ll_main_dev_cur);
+
+ /* Decrement number of device on the lcore. */
+ lcore_info[ll_lcore_dev_cur->dev->coreid].lcore_ll->device_num--;
+
+ RTE_LOG(INFO, VHOST_DATA, " #####(%"PRIu64") Device has been removed from data core\n", dev->device_fh);
+}
+
+/*
+ * A new device is added to a data core. First the device is added to the main linked list
+ * and the allocated to a specific data core.
+ */
+static int
+new_device (struct virtio_net *dev)
+{
+ struct virtio_net_data_ll *ll_dev;
+ int lcore, core_add = 0;
+ uint32_t device_num_min = num_devices;
+
+ /* Add device to main ll */
+ ll_dev = get_data_ll_free_entry(&ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") No free entry found in linked list. Device limit "
+ "of %d devices per core has been reached\n",
+ dev->device_fh, num_devices);
+ return -1;
+ }
+ ll_dev->dev = dev;
+ add_data_ll_entry(&ll_root_used, ll_dev);
+
+ /*reset ready flag*/
+ dev->ready = DEVICE_NOT_READY;
+ dev->remove = 0;
+
+ /* Find a suitable lcore to add the device. */
+ RTE_LCORE_FOREACH_SLAVE(lcore) {
+ if (lcore_info[lcore].lcore_ll->device_num < device_num_min) {
+ device_num_min = lcore_info[lcore].lcore_ll->device_num;
+ core_add = lcore;
+ }
+ }
+ /* Add device to lcore ll */
+ ll_dev->dev->coreid = core_add;
+ ll_dev = get_data_ll_free_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_free);
+ if (ll_dev == NULL) {
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Failed to add device to data core\n", dev->device_fh);
+ destroy_device(dev);
+ return -1;
+ }
+ ll_dev->dev = dev;
+ add_data_ll_entry(&lcore_info[ll_dev->dev->coreid].lcore_ll->ll_root_used, ll_dev);
+
+ /* Initialize device stats */
+ memset(&dev_statistics[dev->device_fh], 0, sizeof(struct device_statistics));
+
+ lcore_info[ll_dev->dev->coreid].lcore_ll->device_num++;
+ dev->flags |= VIRTIO_DEV_RUNNING;
+
+ RTE_LOG(INFO, VHOST_DATA, "(%"PRIu64") Device has been added to data core %d\n", dev->device_fh, dev->coreid);
+
+ link_vmdq(dev);
+
+ return 0;
+}
+
+/*
+ * These callback allow devices to be added to the data core when configuration
+ * has been fully complete.
+ */
+static const struct virtio_net_device_ops virtio_net_device_ops =
+{
+ .new_device = new_device,
+ .destroy_device = destroy_device,
+};
+
+/*
+ * This is a thread will wake up after a period to print stats if the user has
+ * enabled them.
+ */
+static void
+print_stats(void)
+{
+ struct virtio_net_data_ll *dev_ll;
+ uint64_t tx_dropped, rx_dropped;
+ uint64_t tx, tx_total, rx, rx_total;
+ uint32_t device_fh;
+ const char clr[] = { 27, '[', '2', 'J', '\0' };
+ const char top_left[] = { 27, '[', '1', ';', '1', 'H','\0' };
+
+ while(1) {
+ sleep(enable_stats);
+
+ /* Clear screen and move to top left */
+ printf("%s%s", clr, top_left);
+
+ printf("\nDevice statistics ====================================");
+
+ dev_ll = ll_root_used;
+ while (dev_ll != NULL) {
+ device_fh = (uint32_t)dev_ll->dev->device_fh;
+ tx_total = dev_statistics[device_fh].tx_total;
+ tx = dev_statistics[device_fh].tx;
+ tx_dropped = tx_total - tx;
+ rx_total = rte_atomic64_read(&dev_statistics[device_fh].rx_total);
+ rx = rte_atomic64_read(&dev_statistics[device_fh].rx);
+ rx_dropped = rx_total - rx;
+
+ printf("\nStatistics for device %"PRIu32" ------------------------------"
+ "\nTX total: %"PRIu64""
+ "\nTX dropped: %"PRIu64""
+ "\nTX successful: %"PRIu64""
+ "\nRX total: %"PRIu64""
+ "\nRX dropped: %"PRIu64""
+ "\nRX successful: %"PRIu64"",
+ device_fh,
+ tx_total,
+ tx_dropped,
+ tx,
+ rx_total,
+ rx_dropped,
+ rx);
+
+ dev_ll = dev_ll->next;
+ }
+ printf("\n======================================================\n");
+ }
+}
+
+
+int init_virtio_net(struct virtio_net_device_ops const * const ops);
+
+/*
+ * Main function, does initialisation and calls the per-lcore functions. The CUSE
+ * device is also registered here to handle the IOCTLs.
+ */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ unsigned lcore_id, core_id = 0;
+ unsigned nb_ports, valid_num_ports;
+ int ret;
+ uint8_t portid;
+ static pthread_t tid;
+ char thread_name[RTE_MAX_THREAD_NAME_LEN];
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse app arguments */
+ ret = us_vhost_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid argument\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++)
+ if (rte_lcore_is_enabled(lcore_id))
+ lcore_ids[core_id ++] = lcore_id;
+
+ if (rte_lcore_count() > RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE,"Not enough cores\n");
+
+ /*set the number of swithcing cores available*/
+ num_switching_cores = rte_lcore_count()-1;
+
+ /* Get the number of physical ports. */
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Update the global var NUM_PORTS and global array PORTS
+ * and get value of var VALID_NUM_PORTS according to system ports number
+ */
+ valid_num_ports = check_ports_num(nb_ports);
+
+ if ((valid_num_ports == 0) || (valid_num_ports > MAX_SUP_PORTS)) {
+ RTE_LOG(INFO, VHOST_PORT, "Current enabled port number is %u,"
+ "but only %u port can be enabled\n",num_ports, MAX_SUP_PORTS);
+ return -1;
+ }
+
+ /* Create the mbuf pool. */
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS_PER_PORT * valid_num_ports, MBUF_CACHE_SIZE, 0,
+ RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* Set log level. */
+ rte_set_log_level(LOG_LEVEL);
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ RTE_LOG(INFO, VHOST_PORT, "Skipping disabled port %d\n", portid);
+ continue;
+ }
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
+ }
+
+ /* Initialise all linked lists. */
+ if (init_data_ll() == -1)
+ rte_exit(EXIT_FAILURE, "Failed to initialize linked list\n");
+
+ /* Initialize device stats */
+ memset(&dev_statistics, 0, sizeof(dev_statistics));
+
+ /* Enable stats if the user option is set. */
+ if (enable_stats) {
+ ret = pthread_create(&tid, NULL, (void *)print_stats, NULL);
+ if (ret != 0)
+ rte_exit(EXIT_FAILURE,
+ "Cannot create print-stats thread\n");
+
+ /* Set thread_name for aid in debugging. */
+ snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "print-xen-stats");
+ ret = rte_thread_setname(tid, thread_name);
+ if (ret != 0)
+ RTE_LOG(ERR, VHOST_CONFIG,
+ "Cannot set print-stats name\n");
+ }
+
+ /* Launch all data cores. */
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(switch_worker, mbuf_pool, lcore_id);
+ }
+
+ init_virtio_xen(&virtio_net_device_ops);
+
+ virtio_monitor_loop();
+ return 0;
+}
diff --git a/examples/vhost_xen/main.h b/examples/vhost_xen/main.h
new file mode 100644
index 00000000..481572e6
--- /dev/null
+++ b/examples/vhost_xen/main.h
@@ -0,0 +1,77 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _MAIN_H_
+#define _MAIN_H_
+
+//#define DEBUG
+
+#ifdef DEBUG
+#define LOG_LEVEL RTE_LOG_DEBUG
+#define LOG_DEBUG(log_type, fmt, args...) \
+ RTE_LOG(DEBUG, log_type, fmt, ##args)
+#else
+#define LOG_LEVEL RTE_LOG_INFO
+#define LOG_DEBUG(log_type, fmt, args...) do{} while(0)
+#endif
+
+/* Macros for printing using RTE_LOG */
+#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1
+#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER2
+#define RTE_LOGTYPE_VHOST_PORT RTE_LOGTYPE_USER3
+
+/*
+ * Device linked list structure for data path.
+ */
+struct virtio_net_data_ll
+{
+ struct virtio_net *dev; /* Pointer to device created by configuration core. */
+ struct virtio_net_data_ll *next; /* Pointer to next device in linked list. */
+};
+
+/*
+ * Structure containing data core specific information.
+ */
+struct lcore_ll_info
+{
+ struct virtio_net_data_ll *ll_root_free; /* Pointer to head in free linked list. */
+ struct virtio_net_data_ll *ll_root_used; /* Pointer to head of used linked list. */
+ uint32_t device_num; /* Number of devices on lcore. */
+ volatile uint8_t dev_removal_flag; /* Flag to synchronize device removal. */
+};
+
+struct lcore_info
+{
+ struct lcore_ll_info *lcore_ll; /* Pointer to data core specific lcore_ll_info struct */
+};
+#endif /* _MAIN_H_ */
diff --git a/examples/vhost_xen/vhost_monitor.c b/examples/vhost_xen/vhost_monitor.c
new file mode 100644
index 00000000..fb9606bf
--- /dev/null
+++ b/examples/vhost_xen/vhost_monitor.c
@@ -0,0 +1,595 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/eventfd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <xen/xen-compat.h>
+#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
+#include <xs.h>
+#else
+#include <xenstore.h>
+#endif
+#include <linux/virtio_ring.h>
+#include <linux/virtio_pci.h>
+#include <linux/virtio_net.h>
+
+#include <rte_ethdev.h>
+#include <rte_log.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+
+#include "virtio-net.h"
+#include "xen_vhost.h"
+
+struct virtio_watch {
+ struct xs_handle *xs;
+ int watch_fd;
+};
+
+
+/* device ops to add/remove device to/from data core. */
+static struct virtio_net_device_ops const *notify_ops;
+
+/* root address of the linked list in the configuration core. */
+static struct virtio_net_config_ll *ll_root = NULL;
+
+/* root address of VM. */
+static struct xen_guestlist guest_root;
+
+static struct virtio_watch watch;
+
+static void
+vq_vring_init(struct vhost_virtqueue *vq, unsigned int num, uint8_t *p,
+ unsigned long align)
+{
+ vq->size = num;
+ vq->desc = (struct vring_desc *) p;
+ vq->avail = (struct vring_avail *) (p +
+ num * sizeof(struct vring_desc));
+ vq->used = (void *)
+ RTE_ALIGN_CEIL( (uintptr_t)(&vq->avail->ring[num]), align);
+
+}
+
+static int
+init_watch(void)
+{
+ struct xs_handle *xs;
+ int ret;
+ int fd;
+
+ /* get a connection to the daemon */
+ xs = xs_daemon_open();
+ if (xs == NULL) {
+ RTE_LOG(ERR, XENHOST, "xs_daemon_open failed\n");
+ return -1;
+ }
+
+ ret = xs_watch(xs, "/local/domain", "mytoken");
+ if (ret == 0) {
+ RTE_LOG(ERR, XENHOST, "%s: xs_watch failed\n", __func__);
+ xs_daemon_close(xs);
+ return -1;
+ }
+
+ /* We are notified of read availability on the watch via the file descriptor. */
+ fd = xs_fileno(xs);
+ watch.xs = xs;
+ watch.watch_fd = fd;
+
+ TAILQ_INIT(&guest_root);
+ return 0;
+}
+
+static struct xen_guest *
+get_xen_guest(int dom_id)
+{
+ struct xen_guest *guest = NULL;
+
+ TAILQ_FOREACH(guest, &guest_root, next) {
+ if(guest->dom_id == dom_id)
+ return guest;
+ }
+
+ return NULL;
+}
+
+
+static struct xen_guest *
+add_xen_guest(int32_t dom_id)
+{
+ struct xen_guest *guest = NULL;
+
+ if ((guest = get_xen_guest(dom_id)) != NULL)
+ return guest;
+
+ guest = calloc(1, sizeof(struct xen_guest));
+ if (guest) {
+ RTE_LOG(ERR, XENHOST, " %s: return newly created guest with %d rings\n", __func__, guest->vring_num);
+ TAILQ_INSERT_TAIL(&guest_root, guest, next);
+ guest->dom_id = dom_id;
+ }
+
+ return guest;
+}
+
+static void
+cleanup_device(struct virtio_net_config_ll *ll_dev)
+{
+ if (ll_dev == NULL)
+ return;
+ if (ll_dev->dev.virtqueue_rx) {
+ rte_free(ll_dev->dev.virtqueue_rx);
+ ll_dev->dev.virtqueue_rx = NULL;
+ }
+ if (ll_dev->dev.virtqueue_tx) {
+ rte_free(ll_dev->dev.virtqueue_tx);
+ ll_dev->dev.virtqueue_tx = NULL;
+ }
+ free(ll_dev);
+}
+
+/*
+ * Add entry containing a device to the device configuration linked list.
+ */
+static void
+add_config_ll_entry(struct virtio_net_config_ll *new_ll_dev)
+{
+ struct virtio_net_config_ll *ll_dev = ll_root;
+
+ /* If ll_dev == NULL then this is the first device so go to else */
+ if (ll_dev) {
+ /* If the 1st device_id != 0 then we insert our device here. */
+ if (ll_dev->dev.device_fh != 0) {
+ new_ll_dev->dev.device_fh = 0;
+ new_ll_dev->next = ll_dev;
+ ll_root = new_ll_dev;
+ } else {
+ /* increment through the ll until we find un unused device_id,
+ * insert the device at that entry
+ */
+ while ((ll_dev->next != NULL) && (ll_dev->dev.device_fh == (ll_dev->next->dev.device_fh - 1)))
+ ll_dev = ll_dev->next;
+
+ new_ll_dev->dev.device_fh = ll_dev->dev.device_fh + 1;
+ new_ll_dev->next = ll_dev->next;
+ ll_dev->next = new_ll_dev;
+ }
+ } else {
+ ll_root = new_ll_dev;
+ ll_root->dev.device_fh = 0;
+ }
+}
+
+
+/*
+ * Remove an entry from the device configuration linked list.
+ */
+static struct virtio_net_config_ll *
+rm_config_ll_entry(struct virtio_net_config_ll *ll_dev, struct virtio_net_config_ll *ll_dev_last)
+{
+ /* First remove the device and then clean it up. */
+ if (ll_dev == ll_root) {
+ ll_root = ll_dev->next;
+ cleanup_device(ll_dev);
+ return ll_root;
+ } else {
+ ll_dev_last->next = ll_dev->next;
+ cleanup_device(ll_dev);
+ return ll_dev_last->next;
+ }
+}
+
+/*
+ * Retrieves an entry from the devices configuration linked list.
+ */
+static struct virtio_net_config_ll *
+get_config_ll_entry(unsigned int virtio_idx, unsigned int dom_id)
+{
+ struct virtio_net_config_ll *ll_dev = ll_root;
+
+ /* Loop through linked list until the dom_id is found. */
+ while (ll_dev != NULL) {
+ if (ll_dev->dev.dom_id == dom_id && ll_dev->dev.virtio_idx == virtio_idx)
+ return ll_dev;
+ ll_dev = ll_dev->next;
+ }
+
+ return NULL;
+}
+
+/*
+ * Initialise all variables in device structure.
+ */
+static void
+init_dev(struct virtio_net *dev)
+{
+ RTE_SET_USED(dev);
+}
+
+
+static struct
+virtio_net_config_ll *new_device(unsigned int virtio_idx, struct xen_guest *guest)
+{
+ struct virtio_net_config_ll *new_ll_dev;
+ struct vhost_virtqueue *virtqueue_rx, *virtqueue_tx;
+ size_t size, vq_ring_size, vq_size = VQ_DESC_NUM;
+ void *vq_ring_virt_mem;
+ uint64_t gpa;
+ uint32_t i;
+
+ /* Setup device and virtqueues. */
+ new_ll_dev = calloc(1, sizeof(struct virtio_net_config_ll));
+ virtqueue_rx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
+ virtqueue_tx = rte_zmalloc(NULL, sizeof(struct vhost_virtqueue), RTE_CACHE_LINE_SIZE);
+ if (new_ll_dev == NULL || virtqueue_rx == NULL || virtqueue_tx == NULL)
+ goto err;
+
+ new_ll_dev->dev.virtqueue_rx = virtqueue_rx;
+ new_ll_dev->dev.virtqueue_tx = virtqueue_tx;
+ new_ll_dev->dev.dom_id = guest->dom_id;
+ new_ll_dev->dev.virtio_idx = virtio_idx;
+ /* Initialise device and virtqueues. */
+ init_dev(&new_ll_dev->dev);
+
+ size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN);
+ vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN);
+ (void)vq_ring_size;
+
+ vq_ring_virt_mem = guest->vring[virtio_idx].rxvring_addr;
+ vq_vring_init(virtqueue_rx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
+ virtqueue_rx->size = vq_size;
+ virtqueue_rx->vhost_hlen = sizeof(struct virtio_net_hdr);
+
+ vq_ring_virt_mem = guest->vring[virtio_idx].txvring_addr;
+ vq_vring_init(virtqueue_tx, vq_size, vq_ring_virt_mem, VIRTIO_PCI_VRING_ALIGN);
+ virtqueue_tx->size = vq_size;
+ memcpy(&new_ll_dev->dev.mac_address, &guest->vring[virtio_idx].addr, sizeof(struct ether_addr));
+
+ /* virtio_memory has to be one per domid */
+ new_ll_dev->dev.mem = malloc(sizeof(struct virtio_memory) + sizeof(struct virtio_memory_regions) * MAX_XENVIRT_MEMPOOL);
+ new_ll_dev->dev.mem->nregions = guest->pool_num;
+ for (i = 0; i < guest->pool_num; i++) {
+ gpa = new_ll_dev->dev.mem->regions[i].guest_phys_address =
+ (uint64_t)((uintptr_t)guest->mempool[i].gva);
+ new_ll_dev->dev.mem->regions[i].guest_phys_address_end =
+ gpa + guest->mempool[i].mempfn_num * getpagesize();
+ new_ll_dev->dev.mem->regions[i].address_offset =
+ (uint64_t)((uintptr_t)guest->mempool[i].hva -
+ (uintptr_t)gpa);
+ }
+
+ new_ll_dev->next = NULL;
+
+ /* Add entry to device configuration linked list. */
+ add_config_ll_entry(new_ll_dev);
+ return new_ll_dev;
+err:
+ free(new_ll_dev);
+ rte_free(virtqueue_rx);
+ rte_free(virtqueue_tx);
+
+ return NULL;
+}
+
+static void
+destroy_guest(struct xen_guest *guest)
+{
+ uint32_t i;
+
+ for (i = 0; i < guest->vring_num; i++)
+ cleanup_vring(&guest->vring[i]);
+ /* clean mempool */
+ for (i = 0; i < guest->pool_num; i++)
+ cleanup_mempool(&guest->mempool[i]);
+ free(guest);
+
+ return;
+}
+
+/*
+ * This function will cleanup the device and remove it from device configuration linked list.
+ */
+static void
+destroy_device(unsigned int virtio_idx, unsigned int dom_id)
+{
+ struct virtio_net_config_ll *ll_dev_cur_ctx, *ll_dev_last = NULL;
+ struct virtio_net_config_ll *ll_dev_cur = ll_root;
+
+ /* clean virtio device */
+ struct xen_guest *guest = NULL;
+ guest = get_xen_guest(dom_id);
+ if (guest == NULL)
+ return;
+
+ /* Find the linked list entry for the device to be removed. */
+ ll_dev_cur_ctx = get_config_ll_entry(virtio_idx, dom_id);
+ while (ll_dev_cur != NULL) {
+ /* If the device is found or a device that doesn't exist is found then it is removed. */
+ if (ll_dev_cur == ll_dev_cur_ctx) {
+ if ((ll_dev_cur->dev.flags & VIRTIO_DEV_RUNNING))
+ notify_ops->destroy_device(&(ll_dev_cur->dev));
+ ll_dev_cur = rm_config_ll_entry(ll_dev_cur, ll_dev_last);
+ } else {
+ ll_dev_last = ll_dev_cur;
+ ll_dev_cur = ll_dev_cur->next;
+ }
+ }
+ RTE_LOG(INFO, XENHOST, " %s guest:%p vring:%p rxvring:%p txvring:%p flag:%p\n",
+ __func__, guest, &guest->vring[virtio_idx], guest->vring[virtio_idx].rxvring_addr, guest->vring[virtio_idx].txvring_addr, guest->vring[virtio_idx].flag);
+ cleanup_vring(&guest->vring[virtio_idx]);
+ guest->vring[virtio_idx].removed = 1;
+ guest->vring_num -= 1;
+}
+
+
+
+
+static void
+watch_unmap_event(void)
+{
+ int i;
+ struct xen_guest *guest = NULL;
+ bool remove_request;
+
+ TAILQ_FOREACH(guest, &guest_root, next) {
+ for (i = 0; i < MAX_VIRTIO; i++) {
+ if (guest->vring[i].dom_id && guest->vring[i].removed == 0 && *guest->vring[i].flag == 0) {
+ RTE_LOG(INFO, XENHOST, "\n\n");
+ RTE_LOG(INFO, XENHOST, " #####%s: (%d, %d) to be removed\n",
+ __func__,
+ guest->vring[i].dom_id,
+ i);
+ destroy_device(i, guest->dom_id);
+ RTE_LOG(INFO, XENHOST, " %s: DOM %u, vring num: %d\n",
+ __func__,
+ guest->dom_id,
+ guest->vring_num);
+ }
+ }
+ }
+
+_find_next_remove:
+ guest = NULL;
+ remove_request = false;
+ TAILQ_FOREACH(guest, &guest_root, next) {
+ if (guest->vring_num == 0) {
+ remove_request = true;
+ break;
+ }
+ }
+ if (remove_request == true) {
+ TAILQ_REMOVE(&guest_root, guest, next);
+ RTE_LOG(INFO, XENHOST, " #####%s: destroy guest (%d)\n", __func__, guest->dom_id);
+ destroy_guest(guest);
+ goto _find_next_remove;
+ }
+ return;
+}
+
+/*
+ * OK, if the guest starts first, it is ok.
+ * if host starts first, it is ok.
+ * if guest starts, and has run for sometime, and host stops and restarts,
+ * then last_used_idx 0? how to solve this. */
+
+static void virtio_init(void)
+{
+ uint32_t len, e_num;
+ uint32_t i,j;
+ char **dom;
+ char *status;
+ int dom_id;
+ char path[PATH_MAX];
+ char node[PATH_MAX];
+ xs_transaction_t th;
+ struct xen_guest *guest;
+ struct virtio_net_config_ll *net_config;
+ char *end;
+ int val;
+
+ /* init env for watch the node */
+ if (init_watch() < 0)
+ return;
+
+ dom = xs_directory(watch.xs, XBT_NULL, "/local/domain", &e_num);
+
+ for (i = 0; i < e_num; i++) {
+ errno = 0;
+ dom_id = strtol(dom[i], &end, 0);
+ if (errno != 0 || end == NULL || dom_id == 0)
+ continue;
+
+ for (j = 0; j < RTE_MAX_ETHPORTS; j++) {
+ snprintf(node, PATH_MAX, "%s%d", VIRTIO_START, j);
+ snprintf(path, PATH_MAX, XEN_VM_NODE_FMT,
+ dom_id, node);
+
+ th = xs_transaction_start(watch.xs);
+ status = xs_read(watch.xs, th, path, &len);
+ xs_transaction_end(watch.xs, th, false);
+
+ if (status == NULL)
+ break;
+
+ /* if there's any valid virtio device */
+ errno = 0;
+ val = strtol(status, &end, 0);
+ if (errno != 0 || end == NULL || dom_id == 0)
+ val = 0;
+ if (val == 1) {
+ guest = add_xen_guest(dom_id);
+ if (guest == NULL)
+ continue;
+ RTE_LOG(INFO, XENHOST, " there's a new virtio existed, new a virtio device\n\n");
+
+ RTE_LOG(INFO, XENHOST, " parse_vringnode dom_id %d virtioidx %d\n",dom_id,j);
+ if (parse_vringnode(guest, j)) {
+ RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
+ TAILQ_REMOVE(&guest_root, guest, next);
+ destroy_guest(guest);
+
+ continue;
+ }
+
+ /*if pool_num > 0, then mempool has already been parsed*/
+ if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
+ RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
+ TAILQ_REMOVE(&guest_root, guest, next);
+ destroy_guest(guest);
+ continue;
+ }
+
+ net_config = new_device(j, guest);
+ /* every thing is ready now, added into data core */
+ notify_ops->new_device(&net_config->dev);
+ }
+ }
+ }
+
+ free(dom);
+ return;
+}
+
+void
+virtio_monitor_loop(void)
+{
+ char **vec;
+ xs_transaction_t th;
+ char *buf;
+ unsigned int len;
+ unsigned int dom_id;
+ uint32_t virtio_idx;
+ struct xen_guest *guest;
+ struct virtio_net_config_ll *net_config;
+ enum fieldnames {
+ FLD_NULL = 0,
+ FLD_LOCAL,
+ FLD_DOMAIN,
+ FLD_ID,
+ FLD_CONTROL,
+ FLD_DPDK,
+ FLD_NODE,
+ _NUM_FLD
+ };
+ char *str_fld[_NUM_FLD];
+ char *str;
+ char *end;
+
+ virtio_init();
+ while (1) {
+ watch_unmap_event();
+
+ usleep(50);
+ vec = xs_check_watch(watch.xs);
+
+ if (vec == NULL)
+ continue;
+
+ th = xs_transaction_start(watch.xs);
+
+ buf = xs_read(watch.xs, th, vec[XS_WATCH_PATH],&len);
+ xs_transaction_end(watch.xs, th, false);
+
+ if (buf) {
+ /* theres' some node for vhost existed */
+ if (rte_strsplit(vec[XS_WATCH_PATH], strnlen(vec[XS_WATCH_PATH], PATH_MAX),
+ str_fld, _NUM_FLD, '/') == _NUM_FLD) {
+ if (strstr(str_fld[FLD_NODE], VIRTIO_START)) {
+ errno = 0;
+ str = str_fld[FLD_ID];
+ dom_id = strtoul(str, &end, 0);
+ if (errno != 0 || end == NULL || end == str ) {
+ RTE_LOG(INFO, XENHOST, "invalid domain id\n");
+ continue;
+ }
+
+ errno = 0;
+ str = str_fld[FLD_NODE] + sizeof(VIRTIO_START) - 1;
+ virtio_idx = strtoul(str, &end, 0);
+ if (errno != 0 || end == NULL || end == str
+ || virtio_idx > MAX_VIRTIO) {
+ RTE_LOG(INFO, XENHOST, "invalid virtio idx\n");
+ continue;
+ }
+ RTE_LOG(INFO, XENHOST, " #####virtio dev (%d, %d) is started\n", dom_id, virtio_idx);
+
+ guest = add_xen_guest(dom_id);
+ if (guest == NULL)
+ continue;
+ guest->dom_id = dom_id;
+ if (parse_vringnode(guest, virtio_idx)) {
+ RTE_LOG(ERR, XENHOST, " there is invalid information in xenstore\n");
+ /*guest newly created? guest existed ?*/
+ TAILQ_REMOVE(&guest_root, guest, next);
+ destroy_guest(guest);
+ continue;
+ }
+ /*if pool_num > 0, then mempool has already been parsed*/
+ if (guest->pool_num == 0 && parse_mempoolnode(guest)) {
+ RTE_LOG(ERR, XENHOST, " there is error information in xenstore\n");
+ TAILQ_REMOVE(&guest_root, guest, next);
+ destroy_guest(guest);
+ continue;
+ }
+
+
+ net_config = new_device(virtio_idx, guest);
+ RTE_LOG(INFO, XENHOST, " Add to dataplane core\n");
+ notify_ops->new_device(&net_config->dev);
+
+ }
+ }
+ }
+
+ free(vec);
+ }
+ return;
+}
+
+/*
+ * Register ops so that we can add/remove device to data core.
+ */
+int
+init_virtio_xen(struct virtio_net_device_ops const *const ops)
+{
+ notify_ops = ops;
+ if (xenhost_init())
+ return -1;
+ return 0;
+}
diff --git a/examples/vhost_xen/virtio-net.h b/examples/vhost_xen/virtio-net.h
new file mode 100644
index 00000000..ab697260
--- /dev/null
+++ b/examples/vhost_xen/virtio-net.h
@@ -0,0 +1,113 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _VIRTIO_NET_H_
+#define _VIRTIO_NET_H_
+
+#include <stdint.h>
+
+#define VQ_DESC_NUM 256
+/* Used to indicate that the device is running on a data core */
+#define VIRTIO_DEV_RUNNING 1
+
+/*
+ * Structure contains variables relevant to TX/RX virtqueues.
+ */
+struct vhost_virtqueue
+{
+ struct vring_desc *desc; /* Virtqueue descriptor ring. */
+ struct vring_avail *avail; /* Virtqueue available ring. */
+ struct vring_used *used; /* Virtqueue used ring. */
+ uint32_t size; /* Size of descriptor ring. */
+ uint32_t vhost_hlen; /* Vhost header length (varies depending on RX merge buffers. */
+ volatile uint16_t last_used_idx; /* Last index used on the available ring */
+ volatile uint16_t last_used_idx_res; /* Used for multiple devices reserving buffers. */
+} __rte_cache_aligned;
+
+/*
+ * Device structure contains all configuration information relating to the device.
+ */
+struct virtio_net
+{
+ struct vhost_virtqueue *virtqueue_tx; /* Contains all TX virtqueue information. */
+ struct vhost_virtqueue *virtqueue_rx; /* Contains all RX virtqueue information. */
+ struct virtio_memory *mem; /* QEMU memory and memory region information. */
+ struct ether_addr mac_address; /* Device MAC address (Obtained on first TX packet). */
+ uint32_t flags; /* Device flags. Only used to check if device is running on data core. */
+ uint32_t vlan_tag; /* Vlan tag for device. Currently set to device_id (0-63). */
+ uint32_t vmdq_rx_q;
+ uint64_t device_fh; /* device identifier. */
+ uint16_t coreid;
+ volatile uint8_t ready; /* A device is set as ready if the MAC address has been set. */
+ volatile uint8_t remove; /* Device is marked for removal from the data core. */
+ uint32_t virtio_idx; /* Index of virtio device */
+ uint32_t dom_id; /* Domain id of xen guest */
+} ___rte_cache_aligned;
+
+/*
+ * Device linked list structure for configuration.
+ */
+struct virtio_net_config_ll
+{
+ struct virtio_net dev; /* Virtio device. */
+ struct virtio_net_config_ll *next; /* Next entry on linked list. */
+};
+
+/*
+ * Information relating to memory regions including offsets to addresses in QEMUs memory file.
+ */
+struct virtio_memory_regions {
+ uint64_t guest_phys_address; /* Base guest physical address of region. */
+ uint64_t guest_phys_address_end; /* End guest physical address of region. */
+ uint64_t memory_size; /* Size of region. */
+ uint64_t userspace_address; /* Base userspace address of region. */
+ uint64_t address_offset; /* Offset of region for address translation. */
+};
+
+/*
+ * Memory structure includes region and mapping information.
+ */
+struct virtio_memory {
+ uint32_t nregions; /* Number of memory regions. */
+ struct virtio_memory_regions regions[0]; /* Memory region information. */
+};
+
+/*
+ * Device operations to add/remove device.
+ */
+struct virtio_net_device_ops {
+ int (* new_device)(struct virtio_net *); /* Add device. */
+ void (* destroy_device) (volatile struct virtio_net *); /* Remove device. */
+};
+
+#endif
diff --git a/examples/vhost_xen/xen_vhost.h b/examples/vhost_xen/xen_vhost.h
new file mode 100644
index 00000000..2fc304c7
--- /dev/null
+++ b/examples/vhost_xen/xen_vhost.h
@@ -0,0 +1,148 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _XEN_VHOST_H_
+#define _XEN_VHOST_H_
+
+#include <stdint.h>
+
+#include <rte_ether.h>
+
+#include "virtio-net.h"
+
+#define RTE_LOGTYPE_XENHOST RTE_LOGTYPE_USER1
+
+#define XEN_VM_ROOTNODE_FMT "/local/domain/%d/control/dpdk"
+#define XEN_VM_NODE_FMT "/local/domain/%d/control/dpdk/%s"
+#define XEN_MEMPOOL_SUFFIX "mempool_gref"
+#define XEN_RXVRING_SUFFIX "rx_vring_gref"
+#define XEN_TXVRING_SUFFIX "tx_vring_gref"
+#define XEN_GVA_SUFFIX "mempool_va"
+#define XEN_VRINGFLAG_SUFFIX "vring_flag"
+#define XEN_ADDR_SUFFIX "ether_addr"
+#define VIRTIO_START "event_type_start_"
+
+#define XEN_GREF_SPLITTOKEN ','
+
+#define MAX_XENVIRT_MEMPOOL 16
+#define MAX_VIRTIO 32
+#define MAX_GREF_PER_NODE 64 /* 128 MB memory */
+
+#define PAGE_SIZE 4096
+#define PAGE_PFNNUM (PAGE_SIZE / sizeof(uint32_t))
+
+#define XEN_GNTDEV_FNAME "/dev/xen/gntdev"
+
+/* xen grant reference info in one grant node */
+struct xen_gnt {
+ uint32_t gref; /* grant reference for this node */
+ union {
+ int gref; /* grant reference */
+ uint32_t pfn_num; /* guest pfn number of grant reference */
+ } gref_pfn[PAGE_PFNNUM];
+}__attribute__((__packed__));
+
+
+/* structure for mempool or vring node list */
+struct xen_gntnode {
+ uint32_t gnt_num; /* grant reference number */
+ struct xen_gnt *gnt_info; /* grant reference info */
+};
+
+
+struct xen_vring {
+ uint32_t dom_id;
+ uint32_t virtio_idx; /* index of virtio device */
+ void *rxvring_addr; /* mapped virtual address of rxvring */
+ void *txvring_addr; /* mapped virtual address of txvring */
+ uint32_t rxpfn_num; /* number of gpfn for rxvring */
+ uint32_t txpfn_num; /* number of gpfn for txvring */
+ uint32_t *rxpfn_tbl; /* array of rxvring gpfn */
+ uint32_t *txpfn_tbl; /* array of txvring gpfn */
+ uint64_t *rx_pindex; /* index used to release rx grefs */
+ uint64_t *tx_pindex; /* index used to release tx grefs */
+ uint64_t flag_index;
+ uint8_t *flag; /* cleared to zero on guest unmap */
+ struct ether_addr addr; /* ethernet address of virtio device */
+ uint8_t removed;
+
+};
+
+struct xen_mempool {
+ uint32_t dom_id; /* guest domain id */
+ uint32_t pool_idx; /* index of memory pool */
+ void *gva; /* guest virtual address of mbuf pool */
+ void *hva; /* host virtual address of mbuf pool */
+ uint32_t mempfn_num; /* number of gpfn for mbuf pool */
+ uint32_t *mempfn_tbl; /* array of mbuf pool gpfn */
+ uint64_t *pindex; /* index used to release grefs */
+};
+
+struct xen_guest {
+ TAILQ_ENTRY(xen_guest) next;
+ int32_t dom_id; /* guest domain id */
+ uint32_t pool_num; /* number of mbuf pool of the guest */
+ uint32_t vring_num; /* number of virtio ports of the guest */
+ /* array contain the guest mbuf pool info */
+ struct xen_mempool mempool[MAX_XENVIRT_MEMPOOL];
+ /* array contain the guest rx/tx vring info */
+ struct xen_vring vring[MAX_VIRTIO];
+};
+
+TAILQ_HEAD(xen_guestlist, xen_guest);
+
+int
+parse_mempoolnode(struct xen_guest *guest);
+
+int
+xenhost_init(void);
+
+int
+parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx);
+
+int
+parse_mempoolnode(struct xen_guest *guest);
+
+void
+cleanup_mempool(struct xen_mempool *mempool);
+
+void
+cleanup_vring(struct xen_vring *vring);
+
+void
+virtio_monitor_loop(void);
+
+int
+init_virtio_xen(struct virtio_net_device_ops const * const);
+
+#endif
diff --git a/examples/vhost_xen/xenstore_parse.c b/examples/vhost_xen/xenstore_parse.c
new file mode 100644
index 00000000..26d24320
--- /dev/null
+++ b/examples/vhost_xen/xenstore_parse.c
@@ -0,0 +1,775 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <xen/sys/gntalloc.h>
+#include <xen/sys/gntdev.h>
+#include <xen/xen-compat.h>
+#if __XEN_LATEST_INTERFACE_VERSION__ < 0x00040200
+#include <xs.h>
+#else
+#include <xenstore.h>
+#endif
+
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_malloc.h>
+#include <rte_string_fns.h>
+#include <rte_log.h>
+#include <rte_debug.h>
+
+#include "xen_vhost.h"
+
+/* xenstore handle */
+static struct xs_handle *xs = NULL;
+
+/* gntdev file descriptor to map grant pages */
+static int d_fd = -1;
+
+/*
+ * The grant node format in xenstore for vring/mpool is like:
+ * idx#_rx_vring_gref = "gref1#, gref2#, gref3#"
+ * idx#_mempool_gref = "gref1#, gref2#, gref3#"
+ * each gref# is the grant reference for a shared page.
+ * In each shared page, we store the grant_node_item items.
+ */
+struct grant_node_item {
+ uint32_t gref;
+ uint32_t pfn;
+} __attribute__((packed));
+
+int cmdline_parse_etheraddr(void *tk, const char *srcbuf,
+ void *res, unsigned ressize);
+
+/* Map grant ref refid at addr_ori*/
+static void *
+xen_grant_mmap(void *addr_ori, int domid, int refid, uint64_t *pindex)
+{
+ struct ioctl_gntdev_map_grant_ref arg;
+ void *addr = NULL;
+ int pg_sz = getpagesize();
+
+ arg.count = 1;
+ arg.refs[0].domid = domid;
+ arg.refs[0].ref = refid;
+
+ int rv = ioctl(d_fd, IOCTL_GNTDEV_MAP_GRANT_REF, &arg);
+ if (rv) {
+ RTE_LOG(ERR, XENHOST, " %s: (%d,%d) %s (ioctl failed)\n", __func__,
+ domid, refid, strerror(errno));
+ return NULL;
+ }
+
+ if (addr_ori == NULL)
+ addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED,
+ d_fd, arg.index);
+ else
+ addr = mmap(addr_ori, pg_sz, PROT_READ|PROT_WRITE, MAP_SHARED | MAP_FIXED,
+ d_fd, arg.index);
+
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, XENHOST, " %s: (%d, %d) %s (map failed)\n", __func__,
+ domid, refid, strerror(errno));
+ return NULL;
+ }
+
+ if (pindex)
+ *pindex = arg.index;
+
+ return addr;
+}
+
+/* Unmap one grant ref, and munmap must be called before this */
+static int
+xen_unmap_grant_ref(uint64_t index)
+{
+ struct ioctl_gntdev_unmap_grant_ref arg;
+ int rv;
+
+ arg.count = 1;
+ arg.index = index;
+ rv = ioctl(d_fd, IOCTL_GNTDEV_UNMAP_GRANT_REF, &arg);
+ if (rv) {
+ RTE_LOG(ERR, XENHOST, " %s: index 0x%" PRIx64 "unmap failed\n", __func__, index);
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Reserve a virtual address space.
+ * On success, returns the pointer. On failure, returns NULL.
+ */
+static void *
+get_xen_virtual(size_t size, size_t page_sz)
+{
+ void *addr;
+ uintptr_t aligned_addr;
+
+ addr = mmap(NULL, size + page_sz, PROT_READ, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED) {
+ RTE_LOG(ERR, XENHOST, "failed get a virtual area\n");
+ return NULL;
+ }
+
+ aligned_addr = RTE_ALIGN_CEIL((uintptr_t)addr, page_sz);
+ munmap(addr, aligned_addr - (uintptr_t)addr);
+ munmap((void *)(aligned_addr + size), page_sz + (uintptr_t)addr - aligned_addr);
+ addr = (void *)(aligned_addr);
+
+ return addr;
+}
+
+static void
+free_xen_virtual(void *addr, size_t size, size_t page_sz __rte_unused)
+{
+ if (addr)
+ munmap(addr, size);
+}
+
+/*
+ * Returns val str in xenstore.
+ * @param path
+ * Full path string for key
+ * @return
+ * Pointer to Val str, NULL on failure
+ */
+static char *
+xen_read_node(char *path, uint32_t *len)
+{
+ char *buf;
+
+ buf = xs_read(xs, XBT_NULL, path, len);
+ return buf;
+}
+
+static int
+cal_pagenum(struct xen_gnt *gnt)
+{
+ unsigned int i;
+ /*
+ * the items in the page are in the format of
+ * gref#,pfn#,...,gref#,pfn#
+ * FIXME, 0 is reserved by system, use it as terminator.
+ */
+ for (i = 0; i < (PAGE_PFNNUM) / 2; i++) {
+ if (gnt->gref_pfn[i * 2].gref <= 0)
+ break;
+ }
+
+ return i;
+}
+
+/* Frees memory allocated to a grant node */
+static void
+xen_free_gntnode(struct xen_gntnode *gntnode)
+{
+ if (gntnode == NULL)
+ return;
+ free(gntnode->gnt_info);
+ free(gntnode);
+}
+
+/*
+ * Parse a grant node.
+ * @param domid
+ * Guest domain id.
+ * @param path
+ * Full path string for a grant node, like for the following (key, val) pair
+ * idx#_mempool_gref = "gref#, gref#, gref#"
+ * path = 'local/domain/domid/control/dpdk/idx#_mempool_gref'
+ * gref# is a shared page contain packed (gref,pfn) entries
+ * @return
+ * Returns the pointer to xen_gntnode
+ */
+static struct xen_gntnode *
+parse_gntnode(int dom_id, char *path)
+{
+ char **gref_list = NULL;
+ uint32_t i, len, gref_num;
+ void *addr = NULL;
+ char *buf = NULL;
+ struct xen_gntnode *gntnode = NULL;
+ struct xen_gnt *gnt = NULL;
+ int pg_sz = getpagesize();
+ char *end;
+ uint64_t index;
+
+ if ((buf = xen_read_node(path, &len)) == NULL)
+ goto err;
+
+ gref_list = malloc(MAX_GREF_PER_NODE * sizeof(char *));
+ if (gref_list == NULL)
+ goto err;
+
+ gref_num = rte_strsplit(buf, len, gref_list, MAX_GREF_PER_NODE,
+ XEN_GREF_SPLITTOKEN);
+ if (gref_num == 0) {
+ RTE_LOG(ERR, XENHOST, " %s: invalid grant node format\n", __func__);
+ goto err;
+ }
+
+ gntnode = calloc(1, sizeof(struct xen_gntnode));
+ gnt = calloc(gref_num, sizeof(struct xen_gnt));
+ if (gnt == NULL || gntnode == NULL)
+ goto err;
+
+ for (i = 0; i < gref_num; i++) {
+ errno = 0;
+ gnt[i].gref = strtol(gref_list[i], &end, 0);
+ if (errno != 0 || end == NULL || end == gref_list[i] ||
+ (*end != '\0' && *end != XEN_GREF_SPLITTOKEN)) {
+ RTE_LOG(ERR, XENHOST, " %s: parse grant node item failed\n", __func__);
+ goto err;
+ }
+ addr = xen_grant_mmap(NULL, dom_id, gnt[i].gref, &index);
+ if (addr == NULL) {
+ RTE_LOG(ERR, XENHOST, " %s: map gref %u failed\n", __func__, gnt[i].gref);
+ goto err;
+ }
+ RTE_LOG(INFO, XENHOST, " %s: map gref %u to %p\n", __func__, gnt[i].gref, addr);
+ memcpy(gnt[i].gref_pfn, addr, pg_sz);
+ if (munmap(addr, pg_sz)) {
+ RTE_LOG(INFO, XENHOST, " %s: unmap gref %u failed\n", __func__, gnt[i].gref);
+ goto err;
+ }
+ if (xen_unmap_grant_ref(index)) {
+ RTE_LOG(INFO, XENHOST, " %s: release gref %u failed\n", __func__, gnt[i].gref);
+ goto err;
+ }
+
+ }
+
+ gntnode->gnt_num = gref_num;
+ gntnode->gnt_info = gnt;
+
+ free(buf);
+ free(gref_list);
+ return gntnode;
+
+err:
+ free(gnt);
+ free(gntnode);
+ free(gref_list);
+ free(buf);
+ return NULL;
+}
+
+/*
+ * This function maps grant node of vring or mbuf pool to a continous virtual address space,
+ * and returns mapped address, pfn array, index array
+ * @param gntnode
+ * Pointer to grant node
+ * @param domid
+ * Guest domain id
+ * @param ppfn
+ * Pointer to pfn array, caller should free this array
+ * @param pgs
+ * Pointer to number of pages
+ * @param ppindex
+ * Pointer to index array, used to release grefs when to free this node
+ * @return
+ * Pointer to mapped virtual address, NULL on failure
+ */
+static void *
+map_gntnode(struct xen_gntnode *gntnode, int domid, uint32_t **ppfn, uint32_t *pgs, uint64_t **ppindex)
+{
+ struct xen_gnt *gnt;
+ uint32_t i, j;
+ size_t total_pages = 0;
+ void *addr;
+ uint32_t *pfn;
+ uint64_t *pindex;
+ uint32_t pfn_num = 0;
+ int pg_sz;
+
+ if (gntnode == NULL)
+ return NULL;
+
+ pg_sz = getpagesize();
+ for (i = 0; i < gntnode->gnt_num; i++) {
+ gnt = gntnode->gnt_info + i;
+ total_pages += cal_pagenum(gnt);
+ }
+ if ((addr = get_xen_virtual(total_pages * pg_sz, pg_sz)) == NULL) {
+ RTE_LOG(ERR, XENHOST, " %s: failed get_xen_virtual\n", __func__);
+ return NULL;
+ }
+ pfn = calloc(total_pages, (size_t)sizeof(uint32_t));
+ pindex = calloc(total_pages, (size_t)sizeof(uint64_t));
+ if (pfn == NULL || pindex == NULL) {
+ free_xen_virtual(addr, total_pages * pg_sz, pg_sz);
+ free(pfn);
+ free(pindex);
+ return NULL;
+ }
+
+ RTE_LOG(INFO, XENHOST, " %s: total pages:%zu, map to [%p, %p]\n", __func__, total_pages, addr, RTE_PTR_ADD(addr, total_pages * pg_sz - 1));
+ for (i = 0; i < gntnode->gnt_num; i++) {
+ gnt = gntnode->gnt_info + i;
+ for (j = 0; j < (PAGE_PFNNUM) / 2; j++) {
+ if ((gnt->gref_pfn[j * 2].gref) <= 0)
+ goto _end;
+ /*alternative: batch map, or through libxc*/
+ if (xen_grant_mmap(RTE_PTR_ADD(addr, pfn_num * pg_sz),
+ domid,
+ gnt->gref_pfn[j * 2].gref,
+ &pindex[pfn_num]) == NULL) {
+ goto mmap_failed;
+ }
+ pfn[pfn_num] = gnt->gref_pfn[j * 2 + 1].pfn_num;
+ pfn_num++;
+ }
+ }
+
+mmap_failed:
+ if (pfn_num)
+ munmap(addr, pfn_num * pg_sz);
+ for (i = 0; i < pfn_num; i++) {
+ xen_unmap_grant_ref(pindex[i]);
+ }
+ free(pindex);
+ free(pfn);
+ return NULL;
+
+_end:
+ if (ppindex)
+ *ppindex = pindex;
+ else
+ free(pindex);
+ if (ppfn)
+ *ppfn = pfn;
+ else
+ free(pfn);
+ if (pgs)
+ *pgs = total_pages;
+
+ return addr;
+}
+
+static int
+parse_mpool_va(struct xen_mempool *mempool)
+{
+ char path[PATH_MAX] = {0};
+ char *buf;
+ uint32_t len;
+ char *end;
+ int ret = -1;
+
+ errno = 0;
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_GVA_SUFFIX,
+ mempool->dom_id, mempool->pool_idx);
+
+ if((buf = xen_read_node(path, &len)) == NULL)
+ goto out;
+ mempool->gva = (void *)strtoul(buf, &end, 16);
+ if (errno != 0 || end == NULL || end == buf || *end != '\0') {
+ mempool->gva = NULL;
+ goto out;
+ }
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
+/*
+ * map mbuf pool
+ */
+static int
+map_mempoolnode(struct xen_gntnode *gntnode,
+ struct xen_mempool *mempool)
+{
+ if (gntnode == NULL || mempool == NULL)
+ return -1;
+
+ mempool->hva =
+ map_gntnode(gntnode, mempool->dom_id, &mempool->mempfn_tbl, &mempool->mempfn_num, &mempool->pindex);
+
+ RTE_LOG(INFO, XENHOST, " %s: map mempool at %p\n", __func__, (void *)mempool->hva);
+ if (mempool->hva)
+ return 0;
+ else {
+ return -1;
+ }
+}
+
+void
+cleanup_mempool(struct xen_mempool *mempool)
+{
+ int pg_sz = getpagesize();
+ uint32_t i;
+
+ if (mempool->hva)
+ munmap(mempool->hva, mempool->mempfn_num * pg_sz);
+ mempool->hva = NULL;
+
+ if (mempool->pindex) {
+ RTE_LOG(INFO, XENHOST, " %s: unmap dom %02u mempool%02u %u grefs\n",
+ __func__,
+ mempool->dom_id,
+ mempool->pool_idx,
+ mempool->mempfn_num);
+ for (i = 0; i < mempool->mempfn_num; i ++) {
+ xen_unmap_grant_ref(mempool->pindex[i]);
+ }
+ }
+ mempool->pindex = NULL;
+
+ free(mempool->mempfn_tbl);
+ mempool->mempfn_tbl = NULL;
+}
+
+/*
+ * process mempool node idx#_mempool_gref, idx = 0, 1, 2...
+ * untill we encounter a node that doesn't exist.
+ */
+int
+parse_mempoolnode(struct xen_guest *guest)
+{
+ uint32_t i, len;
+ char path[PATH_MAX] = {0};
+ struct xen_gntnode *gntnode = NULL;
+ struct xen_mempool *mempool = NULL;
+ char *buf;
+
+ bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
+ guest->pool_num = 0;
+
+ while (1) {
+ /* check if null terminated */
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_MEMPOOL_SUFFIX,
+ guest->dom_id,
+ guest->pool_num);
+
+ if ((buf = xen_read_node(path, &len)) != NULL) {
+ /* this node exists */
+ free(buf);
+ } else {
+ if (guest->pool_num == 0) {
+ RTE_LOG(ERR, PMD, "no mempool found\n");
+ return -1;
+ }
+ break;
+ }
+
+ mempool = &guest->mempool[guest->pool_num];
+ mempool->dom_id = guest->dom_id;
+ mempool->pool_idx = guest->pool_num;
+
+ RTE_LOG(INFO, XENHOST, " %s: mempool %u parse gntnode %s\n", __func__, guest->pool_num, path);
+ gntnode = parse_gntnode(guest->dom_id, path);
+ if (gntnode == NULL)
+ goto err;
+
+ if (parse_mpool_va(mempool))
+ goto err;
+
+ RTE_LOG(INFO, XENHOST, " %s: mempool %u map gntnode %s\n", __func__, guest->pool_num, path);
+ if (map_mempoolnode(gntnode, mempool))
+ goto err;
+
+ xen_free_gntnode(gntnode);
+ guest->pool_num++;
+ }
+
+ return 0;
+err:
+ if (gntnode)
+ xen_free_gntnode(gntnode);
+ for (i = 0; i < MAX_XENVIRT_MEMPOOL ; i++) {
+ cleanup_mempool(&guest->mempool[i]);
+ }
+ /* reinitialise mempool */
+ bzero(&guest->mempool, MAX_XENVIRT_MEMPOOL * sizeof(guest->mempool[0]));
+ return -1;
+}
+
+static int
+xen_map_vringflag(struct xen_vring *vring)
+{
+ char path[PATH_MAX] = {0};
+ char *buf;
+ uint32_t len,gref;
+ int pg_sz = getpagesize();
+ char *end;
+
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_VRINGFLAG_SUFFIX,
+ vring->dom_id, vring->virtio_idx);
+
+ if((buf = xen_read_node(path, &len)) == NULL)
+ goto err;
+
+ errno = 0;
+ gref = strtol(buf, &end, 0);
+ if (errno != 0 || end == NULL || end == buf) {
+ goto err;
+ }
+ vring->flag = xen_grant_mmap(0, vring->dom_id, gref, &vring->flag_index);
+ if (vring->flag == NULL || *vring->flag == 0)
+ goto err;
+
+ free(buf);
+ return 0;
+err:
+ free(buf);
+ if (vring->flag) {
+ munmap(vring->flag, pg_sz);
+ vring->flag = NULL;
+ xen_unmap_grant_ref(vring->flag_index);
+ }
+ return -1;
+}
+
+
+static int
+xen_map_rxvringnode(struct xen_gntnode *gntnode,
+ struct xen_vring *vring)
+{
+ vring->rxvring_addr =
+ map_gntnode(gntnode, vring->dom_id, &vring->rxpfn_tbl, &vring->rxpfn_num, &vring->rx_pindex);
+ RTE_LOG(INFO, XENHOST, " %s: map rx vring at %p\n", __func__, (void *)vring->rxvring_addr);
+ if (vring->rxvring_addr)
+ return 0;
+ else
+ return -1;
+}
+
+static int
+xen_map_txvringnode(struct xen_gntnode *gntnode,
+ struct xen_vring *vring)
+{
+ vring->txvring_addr =
+ map_gntnode(gntnode, vring->dom_id, &vring->txpfn_tbl, &vring->txpfn_num, &vring->tx_pindex);
+ RTE_LOG(INFO, XENHOST, " %s: map tx vring at %p\n", __func__, (void *)vring->txvring_addr);
+ if (vring->txvring_addr)
+ return 0;
+ else
+ return -1;
+}
+
+void
+cleanup_vring(struct xen_vring *vring)
+{
+ int pg_sz = getpagesize();
+ uint32_t i;
+
+ RTE_LOG(INFO, XENHOST, " %s: cleanup dom %u vring %u\n", __func__, vring->dom_id, vring->virtio_idx);
+ if (vring->rxvring_addr) {
+ munmap(vring->rxvring_addr, vring->rxpfn_num * pg_sz);
+ RTE_LOG(INFO, XENHOST, " %s: unmap rx vring [%p, %p]\n",
+ __func__,
+ vring->rxvring_addr,
+ RTE_PTR_ADD(vring->rxvring_addr,
+ vring->rxpfn_num * pg_sz - 1));
+ }
+ vring->rxvring_addr = NULL;
+
+
+ if (vring->rx_pindex) {
+ RTE_LOG(INFO, XENHOST, " %s: unmap rx vring %u grefs\n", __func__, vring->rxpfn_num);
+ for (i = 0; i < vring->rxpfn_num; i++) {
+ xen_unmap_grant_ref(vring->rx_pindex[i]);
+ }
+ }
+ vring->rx_pindex = NULL;
+
+ free(vring->rxpfn_tbl);
+ vring->rxpfn_tbl = NULL;
+
+ if (vring->txvring_addr) {
+ munmap(vring->txvring_addr, vring->txpfn_num * pg_sz);
+ RTE_LOG(INFO, XENHOST, " %s: unmap tx vring [%p, %p]\n",
+ __func__,
+ vring->txvring_addr,
+ RTE_PTR_ADD(vring->txvring_addr,
+ vring->txpfn_num * pg_sz - 1));
+ }
+ vring->txvring_addr = NULL;
+
+ if (vring->tx_pindex) {
+ RTE_LOG(INFO, XENHOST, " %s: unmap tx vring %u grefs\n", __func__, vring->txpfn_num);
+ for (i = 0; i < vring->txpfn_num; i++) {
+ xen_unmap_grant_ref(vring->tx_pindex[i]);
+ }
+ }
+ vring->tx_pindex = NULL;
+
+ free(vring->txpfn_tbl);
+ vring->txpfn_tbl = NULL;
+
+ if (vring->flag) {
+ if (!munmap((void *)vring->flag, pg_sz))
+ RTE_LOG(INFO, XENHOST, " %s: unmap flag page at %p\n", __func__, vring->flag);
+ if (!xen_unmap_grant_ref(vring->flag_index))
+ RTE_LOG(INFO, XENHOST, " %s: release flag ref index 0x%" PRIx64 "\n", __func__, vring->flag_index);
+ }
+ vring->flag = NULL;
+ return;
+}
+
+
+
+static int
+xen_parse_etheraddr(struct xen_vring *vring)
+{
+ char path[PATH_MAX] = {0};
+ char *buf;
+ uint32_t len;
+ int ret = -1;
+
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_ADDR_SUFFIX,
+ vring->dom_id, vring->virtio_idx);
+
+ if ((buf = xen_read_node(path, &len)) == NULL)
+ goto out;
+
+ if (cmdline_parse_etheraddr(NULL, buf, &vring->addr,
+ sizeof(vring->addr)) < 0)
+ goto out;
+ ret = 0;
+out:
+ free(buf);
+ return ret;
+}
+
+
+int
+parse_vringnode(struct xen_guest *guest, uint32_t virtio_idx)
+{
+ char path[PATH_MAX] = {0};
+ struct xen_gntnode *rx_gntnode = NULL;
+ struct xen_gntnode *tx_gntnode = NULL;
+ struct xen_vring *vring = NULL;
+
+ /*check if null terminated */
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_RXVRING_SUFFIX,
+ guest->dom_id,
+ virtio_idx);
+
+ RTE_LOG(INFO, XENHOST, " %s: virtio %u parse rx gntnode %s\n", __func__, virtio_idx, path);
+ rx_gntnode = parse_gntnode(guest->dom_id, path);
+ if (rx_gntnode == NULL)
+ goto err;
+
+ /*check if null terminated */
+ snprintf(path, sizeof(path),
+ XEN_VM_ROOTNODE_FMT"/%d_"XEN_TXVRING_SUFFIX,
+ guest->dom_id,
+ virtio_idx);
+
+ RTE_LOG(INFO, XENHOST, " %s: virtio %u parse tx gntnode %s\n", __func__, virtio_idx, path);
+ tx_gntnode = parse_gntnode(guest->dom_id, path);
+ if (tx_gntnode == NULL)
+ goto err;
+
+ vring = &guest->vring[virtio_idx];
+ bzero(vring, sizeof(*vring));
+ vring->dom_id = guest->dom_id;
+ vring->virtio_idx = virtio_idx;
+
+ if (xen_parse_etheraddr(vring) != 0)
+ goto err;
+
+ RTE_LOG(INFO, XENHOST, " %s: virtio %u map rx gntnode %s\n", __func__, virtio_idx, path);
+ if (xen_map_rxvringnode(rx_gntnode, vring) != 0)
+ goto err;
+
+ RTE_LOG(INFO, XENHOST, " %s: virtio %u map tx gntnode %s\n", __func__, virtio_idx, path);
+ if (xen_map_txvringnode(tx_gntnode, vring) != 0)
+ goto err;
+
+ if (xen_map_vringflag(vring) != 0)
+ goto err;
+
+ guest->vring_num++;
+
+ xen_free_gntnode(rx_gntnode);
+ xen_free_gntnode(tx_gntnode);
+
+ return 0;
+
+err:
+ if (rx_gntnode)
+ xen_free_gntnode(rx_gntnode);
+ if (tx_gntnode)
+ xen_free_gntnode(tx_gntnode);
+ if (vring) {
+ cleanup_vring(vring);
+ bzero(vring, sizeof(*vring));
+ }
+ return -1;
+}
+
+/*
+ * Open xen grant dev driver
+ * @return
+ * 0 on success, -1 on failure.
+ */
+static int
+xen_grant_init(void)
+{
+ d_fd = open(XEN_GNTDEV_FNAME, O_RDWR);
+
+ return d_fd == -1? (-1): (0);
+}
+
+/*
+ * Initialise xenstore handle and open grant dev driver.
+ * @return
+ * 0 on success, -1 on failure.
+ */
+int
+xenhost_init(void)
+{
+ xs = xs_daemon_open();
+ if (xs == NULL) {
+ rte_panic("failed initialize xen daemon handler");
+ return -1;
+ }
+ if (xen_grant_init())
+ return -1;
+ return 0;
+}
diff --git a/examples/vm_power_manager/Makefile b/examples/vm_power_manager/Makefile
new file mode 100644
index 00000000..59a96417
--- /dev/null
+++ b/examples/vm_power_manager/Makefile
@@ -0,0 +1,65 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifneq ($(shell pkg-config --atleast-version=0.9.3 libvirt; echo $$?), 0)
+$(error vm_power_manager requires libvirt >= 0.9.3)
+else
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vm_power_mgr
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vm_power_cli.c power_manager.c channel_manager.c
+SRCS-y += channel_monitor.c
+
+CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/
+CFLAGS += $(WERROR_FLAGS)
+
+LDLIBS += -lvirt
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
+
+endif # libvirt check
diff --git a/examples/vm_power_manager/channel_manager.c b/examples/vm_power_manager/channel_manager.c
new file mode 100644
index 00000000..22c2ddd5
--- /dev/null
+++ b/examples/vm_power_manager/channel_manager.c
@@ -0,0 +1,805 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <dirent.h>
+#include <errno.h>
+
+#include <sys/queue.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <sys/select.h>
+
+#include <rte_malloc.h>
+#include <rte_memory.h>
+#include <rte_mempool.h>
+#include <rte_log.h>
+#include <rte_atomic.h>
+#include <rte_spinlock.h>
+
+#include <libvirt/libvirt.h>
+
+#include "channel_manager.h"
+#include "channel_commands.h"
+#include "channel_monitor.h"
+
+
+#define RTE_LOGTYPE_CHANNEL_MANAGER RTE_LOGTYPE_USER1
+
+#define ITERATIVE_BITMASK_CHECK_64(mask_u64b, i) \
+ for (i = 0; mask_u64b; mask_u64b &= ~(1ULL << i++)) \
+ if ((mask_u64b >> i) & 1) \
+
+/* Global pointer to libvirt connection */
+static virConnectPtr global_vir_conn_ptr;
+
+static unsigned char *global_cpumaps;
+static virVcpuInfo *global_vircpuinfo;
+static size_t global_maplen;
+
+static unsigned global_n_host_cpus;
+
+/*
+ * Represents a single Virtual Machine
+ */
+struct virtual_machine_info {
+ char name[CHANNEL_MGR_MAX_NAME_LEN];
+ rte_atomic64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS];
+ struct channel_info *channels[CHANNEL_CMDS_MAX_VM_CHANNELS];
+ uint64_t channel_mask;
+ uint8_t num_channels;
+ enum vm_status status;
+ virDomainPtr domainPtr;
+ virDomainInfo info;
+ rte_spinlock_t config_spinlock;
+ LIST_ENTRY(virtual_machine_info) vms_info;
+};
+
+LIST_HEAD(, virtual_machine_info) vm_list_head;
+
+static struct virtual_machine_info *
+find_domain_by_name(const char *name)
+{
+ struct virtual_machine_info *info;
+ LIST_FOREACH(info, &vm_list_head, vms_info) {
+ if (!strncmp(info->name, name, CHANNEL_MGR_MAX_NAME_LEN-1))
+ return info;
+ }
+ return NULL;
+}
+
+static int
+update_pcpus_mask(struct virtual_machine_info *vm_info)
+{
+ virVcpuInfoPtr cpuinfo;
+ unsigned i, j;
+ int n_vcpus;
+ uint64_t mask;
+
+ memset(global_cpumaps, 0, CHANNEL_CMDS_MAX_CPUS*global_maplen);
+
+ if (!virDomainIsActive(vm_info->domainPtr)) {
+ n_vcpus = virDomainGetVcpuPinInfo(vm_info->domainPtr,
+ vm_info->info.nrVirtCpu, global_cpumaps, global_maplen,
+ VIR_DOMAIN_AFFECT_CONFIG);
+ if (n_vcpus < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for "
+ "in-active VM '%s'\n", vm_info->name);
+ return -1;
+ }
+ goto update_pcpus;
+ }
+
+ memset(global_vircpuinfo, 0, sizeof(*global_vircpuinfo)*
+ CHANNEL_CMDS_MAX_CPUS);
+
+ cpuinfo = global_vircpuinfo;
+
+ n_vcpus = virDomainGetVcpus(vm_info->domainPtr, cpuinfo,
+ CHANNEL_CMDS_MAX_CPUS, global_cpumaps, global_maplen);
+ if (n_vcpus < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting vCPU info for "
+ "active VM '%s'\n", vm_info->name);
+ return -1;
+ }
+update_pcpus:
+ if (n_vcpus >= CHANNEL_CMDS_MAX_CPUS) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Number of vCPUS(%u) is out of range "
+ "0...%d\n", n_vcpus, CHANNEL_CMDS_MAX_CPUS-1);
+ return -1;
+ }
+ if (n_vcpus != vm_info->info.nrVirtCpu) {
+ RTE_LOG(INFO, CHANNEL_MANAGER, "Updating the number of vCPUs for VM '%s"
+ " from %d -> %d\n", vm_info->name, vm_info->info.nrVirtCpu,
+ n_vcpus);
+ vm_info->info.nrVirtCpu = n_vcpus;
+ }
+ for (i = 0; i < vm_info->info.nrVirtCpu; i++) {
+ mask = 0;
+ for (j = 0; j < global_n_host_cpus; j++) {
+ if (VIR_CPU_USABLE(global_cpumaps, global_maplen, i, j) > 0) {
+ mask |= 1ULL << j;
+ }
+ }
+ rte_atomic64_set(&vm_info->pcpu_mask[i], mask);
+ }
+ return 0;
+}
+
+int
+set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask)
+{
+ unsigned i = 0;
+ int flags = VIR_DOMAIN_AFFECT_LIVE|VIR_DOMAIN_AFFECT_CONFIG;
+ struct virtual_machine_info *vm_info;
+ uint64_t mask = core_mask;
+
+ if (vcpu >= CHANNEL_CMDS_MAX_CPUS) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds max allowable(%d)\n",
+ vcpu, CHANNEL_CMDS_MAX_CPUS-1);
+ return -1;
+ }
+
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name);
+ return -1;
+ }
+
+ if (!virDomainIsActive(vm_info->domainPtr)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU "
+ "mask(0x%"PRIx64") for VM '%s', VM is not active\n",
+ vcpu, core_mask, vm_info->name);
+ return -1;
+ }
+
+ if (vcpu >= vm_info->info.nrVirtCpu) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "vCPU(%u) exceeds the assigned number of "
+ "vCPUs(%u)\n", vcpu, vm_info->info.nrVirtCpu);
+ return -1;
+ }
+ memset(global_cpumaps, 0 , CHANNEL_CMDS_MAX_CPUS * global_maplen);
+ ITERATIVE_BITMASK_CHECK_64(mask, i) {
+ VIR_USE_CPU(global_cpumaps, i);
+ if (i >= global_n_host_cpus) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "CPU(%u) exceeds the available "
+ "number of CPUs(%u)\n", i, global_n_host_cpus);
+ return -1;
+ }
+ }
+ if (virDomainPinVcpuFlags(vm_info->domainPtr, vcpu, global_cpumaps,
+ global_maplen, flags) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to set vCPU(%u) to pCPU "
+ "mask(0x%"PRIx64") for VM '%s'\n", vcpu, core_mask,
+ vm_info->name);
+ return -1;
+ }
+ rte_atomic64_set(&vm_info->pcpu_mask[vcpu], core_mask);
+ return 0;
+
+}
+
+int
+set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num)
+{
+ uint64_t mask = 1ULL << core_num;
+
+ return set_pcpus_mask(vm_name, vcpu, mask);
+}
+
+uint64_t
+get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu)
+{
+ struct virtual_machine_info *vm_info =
+ (struct virtual_machine_info *)chan_info->priv_info;
+ return rte_atomic64_read(&vm_info->pcpu_mask[vcpu]);
+}
+
+static inline int
+channel_exists(struct virtual_machine_info *vm_info, unsigned channel_num)
+{
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+ if (vm_info->channel_mask & (1ULL << channel_num)) {
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+ return 1;
+ }
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+ return 0;
+}
+
+
+
+static int
+open_non_blocking_channel(struct channel_info *info)
+{
+ int ret, flags;
+ struct sockaddr_un sock_addr;
+ fd_set soc_fd_set;
+ struct timeval tv;
+
+ info->fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (info->fd == -1) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error(%s) creating socket for '%s'\n",
+ strerror(errno),
+ info->channel_path);
+ return -1;
+ }
+ sock_addr.sun_family = AF_UNIX;
+ memcpy(&sock_addr.sun_path, info->channel_path,
+ strlen(info->channel_path)+1);
+
+ /* Get current flags */
+ flags = fcntl(info->fd, F_GETFL, 0);
+ if (flags < 0) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) fcntl get flags socket for"
+ "'%s'\n", strerror(errno), info->channel_path);
+ return 1;
+ }
+ /* Set to Non Blocking */
+ flags |= O_NONBLOCK;
+ if (fcntl(info->fd, F_SETFL, flags) < 0) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) setting non-blocking "
+ "socket for '%s'\n", strerror(errno), info->channel_path);
+ return -1;
+ }
+ ret = connect(info->fd, (struct sockaddr *)&sock_addr,
+ sizeof(sock_addr));
+ if (ret < 0) {
+ /* ECONNREFUSED error is given when VM is not active */
+ if (errno == ECONNREFUSED) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "VM is not active or has not "
+ "activated its endpoint to channel %s\n",
+ info->channel_path);
+ return -1;
+ }
+ /* Wait for tv_sec if in progress */
+ else if (errno == EINPROGRESS) {
+ tv.tv_sec = 2;
+ tv.tv_usec = 0;
+ FD_ZERO(&soc_fd_set);
+ FD_SET(info->fd, &soc_fd_set);
+ if (select(info->fd+1, NULL, &soc_fd_set, NULL, &tv) > 0) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Timeout or error on channel "
+ "'%s'\n", info->channel_path);
+ return -1;
+ }
+ } else {
+ /* Any other error */
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Error(%s) connecting socket"
+ " for '%s'\n", strerror(errno), info->channel_path);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int
+setup_channel_info(struct virtual_machine_info **vm_info_dptr,
+ struct channel_info **chan_info_dptr, unsigned channel_num)
+{
+ struct channel_info *chan_info = *chan_info_dptr;
+ struct virtual_machine_info *vm_info = *vm_info_dptr;
+
+ chan_info->channel_num = channel_num;
+ chan_info->priv_info = (void *)vm_info;
+ chan_info->status = CHANNEL_MGR_CHANNEL_DISCONNECTED;
+ if (open_non_blocking_channel(chan_info) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Could not open channel: "
+ "'%s' for VM '%s'\n",
+ chan_info->channel_path, vm_info->name);
+ return -1;
+ }
+ if (add_channel_to_monitor(&chan_info) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Could add channel: "
+ "'%s' to epoll ctl for VM '%s'\n",
+ chan_info->channel_path, vm_info->name);
+ return -1;
+
+ }
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+ vm_info->num_channels++;
+ vm_info->channel_mask |= 1ULL << channel_num;
+ vm_info->channels[channel_num] = chan_info;
+ chan_info->status = CHANNEL_MGR_CHANNEL_CONNECTED;
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+ return 0;
+}
+
+int
+add_all_channels(const char *vm_name)
+{
+ DIR *d;
+ struct dirent *dir;
+ struct virtual_machine_info *vm_info;
+ struct channel_info *chan_info;
+ char *token, *remaining, *tail_ptr;
+ char socket_name[PATH_MAX];
+ unsigned channel_num;
+ int num_channels_enabled = 0;
+
+ /* verify VM exists */
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' not found"
+ " during channel discovery\n", vm_name);
+ return 0;
+ }
+ if (!virDomainIsActive(vm_info->domainPtr)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name);
+ vm_info->status = CHANNEL_MGR_VM_INACTIVE;
+ return 0;
+ }
+ d = opendir(CHANNEL_MGR_SOCKET_PATH);
+ if (d == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error opening directory '%s': %s\n",
+ CHANNEL_MGR_SOCKET_PATH, strerror(errno));
+ return -1;
+ }
+ while ((dir = readdir(d)) != NULL) {
+ if (!strncmp(dir->d_name, ".", 1) ||
+ !strncmp(dir->d_name, "..", 2))
+ continue;
+
+ snprintf(socket_name, sizeof(socket_name), "%s", dir->d_name);
+ remaining = socket_name;
+ /* Extract vm_name from "<vm_name>.<channel_num>" */
+ token = strsep(&remaining, ".");
+ if (remaining == NULL)
+ continue;
+ if (strncmp(vm_name, token, CHANNEL_MGR_MAX_NAME_LEN))
+ continue;
+
+ /* remaining should contain only <channel_num> */
+ errno = 0;
+ channel_num = (unsigned)strtol(remaining, &tail_ptr, 0);
+ if ((errno != 0) || (remaining[0] == '\0') ||
+ tail_ptr == NULL || (*tail_ptr != '\0')) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Malformed channel name"
+ "'%s' found it should be in the form of "
+ "'<guest_name>.<channel_num>(decimal)'\n",
+ dir->d_name);
+ continue;
+ }
+ if (channel_num >= CHANNEL_CMDS_MAX_VM_CHANNELS) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "Channel number(%u) is "
+ "greater than max allowable: %d, skipping '%s%s'\n",
+ channel_num, CHANNEL_CMDS_MAX_VM_CHANNELS-1,
+ CHANNEL_MGR_SOCKET_PATH, dir->d_name);
+ continue;
+ }
+ /* if channel has not been added previously */
+ if (channel_exists(vm_info, channel_num))
+ continue;
+
+ chan_info = rte_malloc(NULL, sizeof(*chan_info),
+ RTE_CACHE_LINE_SIZE);
+ if (chan_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for "
+ "channel '%s%s'\n", CHANNEL_MGR_SOCKET_PATH, dir->d_name);
+ continue;
+ }
+
+ snprintf(chan_info->channel_path,
+ sizeof(chan_info->channel_path), "%s%s",
+ CHANNEL_MGR_SOCKET_PATH, dir->d_name);
+
+ if (setup_channel_info(&vm_info, &chan_info, channel_num) < 0) {
+ rte_free(chan_info);
+ continue;
+ }
+
+ num_channels_enabled++;
+ }
+ closedir(d);
+ return num_channels_enabled;
+}
+
+int
+add_channels(const char *vm_name, unsigned *channel_list,
+ unsigned len_channel_list)
+{
+ struct virtual_machine_info *vm_info;
+ struct channel_info *chan_info;
+ char socket_path[PATH_MAX];
+ unsigned i;
+ int num_channels_enabled = 0;
+
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' "
+ "not found\n", vm_name);
+ return 0;
+ }
+
+ if (!virDomainIsActive(vm_info->domainPtr)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "VM: '%s' is not active\n", vm_name);
+ vm_info->status = CHANNEL_MGR_VM_INACTIVE;
+ return 0;
+ }
+
+ for (i = 0; i < len_channel_list; i++) {
+
+ if (channel_list[i] >= CHANNEL_CMDS_MAX_VM_CHANNELS) {
+ RTE_LOG(INFO, CHANNEL_MANAGER, "Channel(%u) is out of range "
+ "0...%d\n", channel_list[i],
+ CHANNEL_CMDS_MAX_VM_CHANNELS-1);
+ continue;
+ }
+ if (channel_exists(vm_info, channel_list[i])) {
+ RTE_LOG(INFO, CHANNEL_MANAGER, "Channel already exists, skipping "
+ "'%s.%u'\n", vm_name, i);
+ continue;
+ }
+
+ snprintf(socket_path, sizeof(socket_path), "%s%s.%u",
+ CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]);
+ errno = 0;
+ if (access(socket_path, F_OK) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Channel path '%s' error: "
+ "%s\n", socket_path, strerror(errno));
+ continue;
+ }
+ chan_info = rte_malloc(NULL, sizeof(*chan_info),
+ RTE_CACHE_LINE_SIZE);
+ if (chan_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for "
+ "channel '%s'\n", socket_path);
+ continue;
+ }
+ snprintf(chan_info->channel_path,
+ sizeof(chan_info->channel_path), "%s%s.%u",
+ CHANNEL_MGR_SOCKET_PATH, vm_name, channel_list[i]);
+ if (setup_channel_info(&vm_info, &chan_info, channel_list[i]) < 0) {
+ rte_free(chan_info);
+ continue;
+ }
+ num_channels_enabled++;
+
+ }
+ return num_channels_enabled;
+}
+
+int
+remove_channel(struct channel_info **chan_info_dptr)
+{
+ struct virtual_machine_info *vm_info;
+ struct channel_info *chan_info = *chan_info_dptr;
+
+ close(chan_info->fd);
+
+ vm_info = (struct virtual_machine_info *)chan_info->priv_info;
+
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+ vm_info->channel_mask &= ~(1ULL << chan_info->channel_num);
+ vm_info->num_channels--;
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+
+ rte_free(chan_info);
+ return 0;
+}
+
+int
+set_channel_status_all(const char *vm_name, enum channel_status status)
+{
+ struct virtual_machine_info *vm_info;
+ unsigned i;
+ uint64_t mask;
+ int num_channels_changed = 0;
+
+ if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED ||
+ status == CHANNEL_MGR_CHANNEL_DISABLED)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or "
+ "disabled: Unable to change status for VM '%s'\n", vm_name);
+ }
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to disable channels: VM '%s' "
+ "not found\n", vm_name);
+ return 0;
+ }
+
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+ mask = vm_info->channel_mask;
+ ITERATIVE_BITMASK_CHECK_64(mask, i) {
+ vm_info->channels[i]->status = status;
+ num_channels_changed++;
+ }
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+ return num_channels_changed;
+
+}
+
+int
+set_channel_status(const char *vm_name, unsigned *channel_list,
+ unsigned len_channel_list, enum channel_status status)
+{
+ struct virtual_machine_info *vm_info;
+ unsigned i;
+ int num_channels_changed = 0;
+
+ if (!(status == CHANNEL_MGR_CHANNEL_CONNECTED ||
+ status == CHANNEL_MGR_CHANNEL_DISABLED)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Channels can only be enabled or "
+ "disabled: Unable to change status for VM '%s'\n", vm_name);
+ }
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add channels: VM '%s' "
+ "not found\n", vm_name);
+ return 0;
+ }
+ for (i = 0; i < len_channel_list; i++) {
+ if (channel_exists(vm_info, channel_list[i])) {
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+ vm_info->channels[channel_list[i]]->status = status;
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+ num_channels_changed++;
+ }
+ }
+ return num_channels_changed;
+}
+
+int
+get_info_vm(const char *vm_name, struct vm_info *info)
+{
+ struct virtual_machine_info *vm_info;
+ unsigned i, channel_num = 0;
+ uint64_t mask;
+
+ vm_info = find_domain_by_name(vm_name);
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "VM '%s' not found\n", vm_name);
+ return -1;
+ }
+ info->status = CHANNEL_MGR_VM_ACTIVE;
+ if (!virDomainIsActive(vm_info->domainPtr))
+ info->status = CHANNEL_MGR_VM_INACTIVE;
+
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+
+ mask = vm_info->channel_mask;
+ ITERATIVE_BITMASK_CHECK_64(mask, i) {
+ info->channels[channel_num].channel_num = i;
+ memcpy(info->channels[channel_num].channel_path,
+ vm_info->channels[i]->channel_path, UNIX_PATH_MAX);
+ info->channels[channel_num].status = vm_info->channels[i]->status;
+ info->channels[channel_num].fd = vm_info->channels[i]->fd;
+ channel_num++;
+ }
+
+ info->num_channels = channel_num;
+ info->num_vcpus = vm_info->info.nrVirtCpu;
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+
+ memcpy(info->name, vm_info->name, sizeof(vm_info->name));
+ for (i = 0; i < info->num_vcpus; i++) {
+ info->pcpu_mask[i] = rte_atomic64_read(&vm_info->pcpu_mask[i]);
+ }
+ return 0;
+}
+
+int
+add_vm(const char *vm_name)
+{
+ struct virtual_machine_info *new_domain;
+ virDomainPtr dom_ptr;
+ int i;
+
+ if (find_domain_by_name(vm_name) != NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to add VM: VM '%s' "
+ "already exists\n", vm_name);
+ return -1;
+ }
+
+ if (global_vir_conn_ptr == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "No connection to hypervisor exists\n");
+ return -1;
+ }
+ dom_ptr = virDomainLookupByName(global_vir_conn_ptr, vm_name);
+ if (dom_ptr == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error on VM lookup with libvirt: "
+ "VM '%s' not found\n", vm_name);
+ return -1;
+ }
+
+ new_domain = rte_malloc("virtual_machine_info", sizeof(*new_domain),
+ RTE_CACHE_LINE_SIZE);
+ if (new_domain == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to allocate memory for VM "
+ "info\n");
+ return -1;
+ }
+ new_domain->domainPtr = dom_ptr;
+ if (virDomainGetInfo(new_domain->domainPtr, &new_domain->info) != 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to get libvirt VM info\n");
+ rte_free(new_domain);
+ return -1;
+ }
+ if (new_domain->info.nrVirtCpu > CHANNEL_CMDS_MAX_CPUS) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error the number of virtual CPUs(%u) is "
+ "greater than allowable(%d)\n", new_domain->info.nrVirtCpu,
+ CHANNEL_CMDS_MAX_CPUS);
+ rte_free(new_domain);
+ return -1;
+ }
+
+ for (i = 0; i < CHANNEL_CMDS_MAX_CPUS; i++) {
+ rte_atomic64_init(&new_domain->pcpu_mask[i]);
+ }
+ if (update_pcpus_mask(new_domain) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error getting physical CPU pinning\n");
+ rte_free(new_domain);
+ return -1;
+ }
+ strncpy(new_domain->name, vm_name, sizeof(new_domain->name));
+ new_domain->channel_mask = 0;
+ new_domain->num_channels = 0;
+
+ if (!virDomainIsActive(dom_ptr))
+ new_domain->status = CHANNEL_MGR_VM_INACTIVE;
+ else
+ new_domain->status = CHANNEL_MGR_VM_ACTIVE;
+
+ rte_spinlock_init(&(new_domain->config_spinlock));
+ LIST_INSERT_HEAD(&vm_list_head, new_domain, vms_info);
+ return 0;
+}
+
+int
+remove_vm(const char *vm_name)
+{
+ struct virtual_machine_info *vm_info = find_domain_by_name(vm_name);
+
+ if (vm_info == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM: VM '%s' "
+ "not found\n", vm_name);
+ return -1;
+ }
+ rte_spinlock_lock(&vm_info->config_spinlock);
+ if (vm_info->num_channels != 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to remove VM '%s', there are "
+ "%"PRId8" channels still active\n",
+ vm_name, vm_info->num_channels);
+ rte_spinlock_unlock(&vm_info->config_spinlock);
+ return -1;
+ }
+ LIST_REMOVE(vm_info, vms_info);
+ rte_spinlock_unlock(&vm_info->config_spinlock);
+ rte_free(vm_info);
+ return 0;
+}
+
+static void
+disconnect_hypervisor(void)
+{
+ if (global_vir_conn_ptr != NULL) {
+ virConnectClose(global_vir_conn_ptr);
+ global_vir_conn_ptr = NULL;
+ }
+}
+
+static int
+connect_hypervisor(const char *path)
+{
+ if (global_vir_conn_ptr != NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error connecting to %s, connection "
+ "already established\n", path);
+ return -1;
+ }
+ global_vir_conn_ptr = virConnectOpen(path);
+ if (global_vir_conn_ptr == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error failed to open connection to "
+ "Hypervisor '%s'\n", path);
+ return -1;
+ }
+ return 0;
+}
+
+int
+channel_manager_init(const char *path)
+{
+ virNodeInfo info;
+
+ LIST_INIT(&vm_list_head);
+ if (connect_hypervisor(path) < 0) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to initialize channel manager\n");
+ return -1;
+ }
+
+ global_maplen = VIR_CPU_MAPLEN(CHANNEL_CMDS_MAX_CPUS);
+
+ global_vircpuinfo = rte_zmalloc(NULL, sizeof(*global_vircpuinfo) *
+ CHANNEL_CMDS_MAX_CPUS, RTE_CACHE_LINE_SIZE);
+ if (global_vircpuinfo == NULL) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Error allocating memory for CPU Info\n");
+ goto error;
+ }
+ global_cpumaps = rte_zmalloc(NULL, CHANNEL_CMDS_MAX_CPUS * global_maplen,
+ RTE_CACHE_LINE_SIZE);
+ if (global_cpumaps == NULL) {
+ goto error;
+ }
+
+ if (virNodeGetInfo(global_vir_conn_ptr, &info)) {
+ RTE_LOG(ERR, CHANNEL_MANAGER, "Unable to retrieve node Info\n");
+ goto error;
+ }
+
+ global_n_host_cpus = (unsigned)info.cpus;
+
+ if (global_n_host_cpus > CHANNEL_CMDS_MAX_CPUS) {
+ RTE_LOG(WARNING, CHANNEL_MANAGER, "The number of host CPUs(%u) exceeds the "
+ "maximum of %u. No cores over %u should be used.\n",
+ global_n_host_cpus, CHANNEL_CMDS_MAX_CPUS,
+ CHANNEL_CMDS_MAX_CPUS - 1);
+ global_n_host_cpus = CHANNEL_CMDS_MAX_CPUS;
+ }
+
+ return 0;
+error:
+ disconnect_hypervisor();
+ return -1;
+}
+
+void
+channel_manager_exit(void)
+{
+ unsigned i;
+ uint64_t mask;
+ struct virtual_machine_info *vm_info;
+
+ LIST_FOREACH(vm_info, &vm_list_head, vms_info) {
+
+ rte_spinlock_lock(&(vm_info->config_spinlock));
+
+ mask = vm_info->channel_mask;
+ ITERATIVE_BITMASK_CHECK_64(mask, i) {
+ remove_channel_from_monitor(vm_info->channels[i]);
+ close(vm_info->channels[i]->fd);
+ rte_free(vm_info->channels[i]);
+ }
+ rte_spinlock_unlock(&(vm_info->config_spinlock));
+
+ LIST_REMOVE(vm_info, vms_info);
+ rte_free(vm_info);
+ }
+
+ rte_free(global_cpumaps);
+ rte_free(global_vircpuinfo);
+ disconnect_hypervisor();
+}
diff --git a/examples/vm_power_manager/channel_manager.h b/examples/vm_power_manager/channel_manager.h
new file mode 100644
index 00000000..67e26ecb
--- /dev/null
+++ b/examples/vm_power_manager/channel_manager.h
@@ -0,0 +1,320 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CHANNEL_MANAGER_H_
+#define CHANNEL_MANAGER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <linux/limits.h>
+#include <sys/un.h>
+#include <rte_atomic.h>
+#include "channel_commands.h"
+
+/* Maximum name length including '\0' terminator */
+#define CHANNEL_MGR_MAX_NAME_LEN 64
+
+/* Maximum number of channels to each Virtual Machine */
+#define CHANNEL_MGR_MAX_CHANNELS 64
+
+/* Hypervisor Path for libvirt(qemu/KVM) */
+#define CHANNEL_MGR_DEFAULT_HV_PATH "qemu:///system"
+
+/* File socket directory */
+#define CHANNEL_MGR_SOCKET_PATH "/tmp/powermonitor/"
+
+#ifndef UNIX_PATH_MAX
+struct sockaddr_un _sockaddr_un;
+#define UNIX_PATH_MAX sizeof(_sockaddr_un.sun_path)
+#endif
+
+/* Communication Channel Status */
+enum channel_status { CHANNEL_MGR_CHANNEL_DISCONNECTED = 0,
+ CHANNEL_MGR_CHANNEL_CONNECTED,
+ CHANNEL_MGR_CHANNEL_DISABLED,
+ CHANNEL_MGR_CHANNEL_PROCESSING};
+
+/* VM libvirt(qemu/KVM) connection status */
+enum vm_status { CHANNEL_MGR_VM_INACTIVE = 0, CHANNEL_MGR_VM_ACTIVE};
+
+/*
+ * Represents a single and exclusive VM channel that exists between a guest and
+ * the host.
+ */
+struct channel_info {
+ char channel_path[UNIX_PATH_MAX]; /**< Path to host socket */
+ volatile uint32_t status; /**< Connection status(enum channel_status) */
+ int fd; /**< AF_UNIX socket fd */
+ unsigned channel_num; /**< CHANNEL_MGR_SOCKET_PATH/<vm_name>.channel_num */
+ void *priv_info; /**< Pointer to private info, do not modify */
+};
+
+/* Represents a single VM instance used to return internal information about
+ * a VM */
+struct vm_info {
+ char name[CHANNEL_MGR_MAX_NAME_LEN]; /**< VM name */
+ enum vm_status status; /**< libvirt status */
+ uint64_t pcpu_mask[CHANNEL_CMDS_MAX_CPUS]; /**< pCPU mask for each vCPU */
+ unsigned num_vcpus; /**< number of vCPUS */
+ struct channel_info channels[CHANNEL_MGR_MAX_CHANNELS]; /**< Array of channel_info */
+ unsigned num_channels; /**< Number of channels */
+};
+
+/**
+ * Initialize the Channel Manager resources and connect to the Hypervisor
+ * specified in path.
+ * This must be successfully called first before calling any other functions.
+ * It must only be call once;
+ *
+ * @param path
+ * Must be a local path, e.g. qemu:///system.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int channel_manager_init(const char *path);
+
+/**
+ * Free resources associated with the Channel Manager.
+ *
+ * @param path
+ * Must be a local path, e.g. qemu:///system.
+ *
+ * @return
+ * None
+ */
+void channel_manager_exit(void);
+
+/**
+ * Get the Physical CPU mask for VM lcore channel(vcpu), result is assigned to
+ * core_mask.
+ * It is not thread-safe.
+ *
+ * @param chan_info
+ * Pointer to struct channel_info
+ *
+ * @param vcpu
+ * The virtual CPU to query.
+ *
+ *
+ * @return
+ * - 0 on error.
+ * - >0 on success.
+ */
+uint64_t get_pcpus_mask(struct channel_info *chan_info, unsigned vcpu);
+
+/**
+ * Set the Physical CPU mask for the specified vCPU.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to lookup
+ *
+ * @param vcpu
+ * The virtual CPU to set.
+ *
+ * @param core_mask
+ * The core mask of the physical CPU(s) to bind the vCPU
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int set_pcpus_mask(char *vm_name, unsigned vcpu, uint64_t core_mask);
+
+/**
+ * Set the Physical CPU for the specified vCPU.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to lookup
+ *
+ * @param vcpu
+ * The virtual CPU to set.
+ *
+ * @param core_num
+ * The core number of the physical CPU(s) to bind the vCPU
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int set_pcpu(char *vm_name, unsigned vcpu, unsigned core_num);
+/**
+ * Add a VM as specified by name to the Channel Manager. The name must
+ * correspond to a valid libvirt domain name.
+ * This is required prior to adding channels.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to lookup.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int add_vm(const char *name);
+
+/**
+ * Remove a previously added Virtual Machine from the Channel Manager
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to lookup.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int remove_vm(const char *name);
+
+/**
+ * Add all available channels to the VM as specified by name.
+ * Channels in the form of paths
+ * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to lookup.
+ *
+ * @return
+ * - N the number of channels added for the VM
+ */
+int add_all_channels(const char *vm_name);
+
+/**
+ * Add the channel numbers in channel_list to the domain specified by name.
+ * Channels in the form of paths
+ * (CHANNEL_MGR_SOCKET_PATH/<vm_name>.<channel_number>) will only be parsed.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to add channels.
+ *
+ * @param channel_list
+ * Pointer to list of unsigned integers, representing the channel number to add
+ * It must be allocated outside of this function.
+ *
+ * @param num_channels
+ * The amount of channel numbers in channel_list
+ *
+ * @return
+ * - N the number of channels added for the VM
+ * - 0 for error
+ */
+int add_channels(const char *vm_name, unsigned *channel_list,
+ unsigned num_channels);
+
+/**
+ * Remove a channel definition from the channel manager. This must only be
+ * called from the channel monitor thread.
+ *
+ * @param chan_info
+ * Pointer to a valid struct channel_info.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int remove_channel(struct channel_info **chan_info_dptr);
+
+/**
+ * For all channels associated with a Virtual Machine name, update the
+ * connection status. Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or
+ * CHANNEL_MGR_CHANNEL_DISABLED only.
+ *
+ *
+ * @param name
+ * Virtual Machine name to modify all channels.
+ *
+ * @param status
+ * The status to set each channel
+ *
+ * @param num_channels
+ * The amount of channel numbers in channel_list
+ *
+ * @return
+ * - N the number of channels added for the VM
+ * - 0 for error
+ */
+int set_channel_status_all(const char *name, enum channel_status status);
+
+/**
+ * For all channels in channel_list associated with a Virtual Machine name
+ * update the connection status of each.
+ * Valid states are CHANNEL_MGR_CHANNEL_CONNECTED or
+ * CHANNEL_MGR_CHANNEL_DISABLED only.
+ * It is not thread-safe.
+ *
+ * @param name
+ * Virtual Machine name to add channels.
+ *
+ * @param channel_list
+ * Pointer to list of unsigned integers, representing the channel numbers to
+ * modify.
+ * It must be allocated outside of this function.
+ *
+ * @param num_channels
+ * The amount of channel numbers in channel_list
+ *
+ * @return
+ * - N the number of channels modified for the VM
+ * - 0 for error
+ */
+int set_channel_status(const char *vm_name, unsigned *channel_list,
+ unsigned len_channel_list, enum channel_status status);
+
+/**
+ * Populates a pointer to struct vm_info associated with vm_name.
+ *
+ * @param vm_name
+ * The name of the virtual machine to lookup.
+ *
+ * @param vm_info
+ * Pointer to a struct vm_info, this must be allocated prior to calling this
+ * function.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int get_info_vm(const char *vm_name, struct vm_info *info);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CHANNEL_MANAGER_H_ */
diff --git a/examples/vm_power_manager/channel_monitor.c b/examples/vm_power_manager/channel_monitor.c
new file mode 100644
index 00000000..e7f5cc4a
--- /dev/null
+++ b/examples/vm_power_manager/channel_monitor.c
@@ -0,0 +1,233 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <signal.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/queue.h>
+
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+
+
+#include "channel_monitor.h"
+#include "channel_commands.h"
+#include "channel_manager.h"
+#include "power_manager.h"
+
+#define RTE_LOGTYPE_CHANNEL_MONITOR RTE_LOGTYPE_USER1
+
+#define MAX_EVENTS 256
+
+
+static volatile unsigned run_loop = 1;
+static int global_event_fd;
+static struct epoll_event *global_events_list;
+
+void channel_monitor_exit(void)
+{
+ run_loop = 0;
+ rte_free(global_events_list);
+}
+
+static int
+process_request(struct channel_packet *pkt, struct channel_info *chan_info)
+{
+ uint64_t core_mask;
+
+ if (chan_info == NULL)
+ return -1;
+
+ if (rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_CONNECTED,
+ CHANNEL_MGR_CHANNEL_PROCESSING) == 0)
+ return -1;
+
+ if (pkt->command == CPU_POWER) {
+ core_mask = get_pcpus_mask(chan_info, pkt->resource_id);
+ if (core_mask == 0) {
+ RTE_LOG(ERR, CHANNEL_MONITOR, "Error get physical CPU mask for "
+ "channel '%s' using vCPU(%u)\n", chan_info->channel_path,
+ (unsigned)pkt->unit);
+ return -1;
+ }
+ if (__builtin_popcountll(core_mask) == 1) {
+
+ unsigned core_num = __builtin_ffsll(core_mask) - 1;
+
+ switch (pkt->unit) {
+ case(CPU_POWER_SCALE_MIN):
+ power_manager_scale_core_min(core_num);
+ break;
+ case(CPU_POWER_SCALE_MAX):
+ power_manager_scale_core_max(core_num);
+ break;
+ case(CPU_POWER_SCALE_DOWN):
+ power_manager_scale_core_down(core_num);
+ break;
+ case(CPU_POWER_SCALE_UP):
+ power_manager_scale_core_up(core_num);
+ break;
+ default:
+ break;
+ }
+ } else {
+ switch (pkt->unit) {
+ case(CPU_POWER_SCALE_MIN):
+ power_manager_scale_mask_min(core_mask);
+ break;
+ case(CPU_POWER_SCALE_MAX):
+ power_manager_scale_mask_max(core_mask);
+ break;
+ case(CPU_POWER_SCALE_DOWN):
+ power_manager_scale_mask_down(core_mask);
+ break;
+ case(CPU_POWER_SCALE_UP):
+ power_manager_scale_mask_up(core_mask);
+ break;
+ default:
+ break;
+ }
+
+ }
+ }
+ /* Return is not checked as channel status may have been set to DISABLED
+ * from management thread
+ */
+ rte_atomic32_cmpset(&(chan_info->status), CHANNEL_MGR_CHANNEL_PROCESSING,
+ CHANNEL_MGR_CHANNEL_CONNECTED);
+ return 0;
+
+}
+
+int
+add_channel_to_monitor(struct channel_info **chan_info)
+{
+ struct channel_info *info = *chan_info;
+ struct epoll_event event;
+
+ event.events = EPOLLIN;
+ event.data.ptr = info;
+ if (epoll_ctl(global_event_fd, EPOLL_CTL_ADD, info->fd, &event) < 0) {
+ RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to add channel '%s' "
+ "to epoll\n", info->channel_path);
+ return -1;
+ }
+ return 0;
+}
+
+int
+remove_channel_from_monitor(struct channel_info *chan_info)
+{
+ if (epoll_ctl(global_event_fd, EPOLL_CTL_DEL, chan_info->fd, NULL) < 0) {
+ RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to remove channel '%s' "
+ "from epoll\n", chan_info->channel_path);
+ return -1;
+ }
+ return 0;
+}
+
+int
+channel_monitor_init(void)
+{
+ global_event_fd = epoll_create1(0);
+ if (global_event_fd == 0) {
+ RTE_LOG(ERR, CHANNEL_MONITOR, "Error creating epoll context with "
+ "error %s\n", strerror(errno));
+ return -1;
+ }
+ global_events_list = rte_malloc("epoll_events", sizeof(*global_events_list)
+ * MAX_EVENTS, RTE_CACHE_LINE_SIZE);
+ if (global_events_list == NULL) {
+ RTE_LOG(ERR, CHANNEL_MONITOR, "Unable to rte_malloc for "
+ "epoll events\n");
+ return -1;
+ }
+ return 0;
+}
+
+void
+run_channel_monitor(void)
+{
+ while (run_loop) {
+ int n_events, i;
+
+ n_events = epoll_wait(global_event_fd, global_events_list,
+ MAX_EVENTS, 1);
+ if (!run_loop)
+ break;
+ for (i = 0; i < n_events; i++) {
+ struct channel_info *chan_info = (struct channel_info *)
+ global_events_list[i].data.ptr;
+ if ((global_events_list[i].events & EPOLLERR) ||
+ (global_events_list[i].events & EPOLLHUP)) {
+ RTE_LOG(DEBUG, CHANNEL_MONITOR, "Remote closed connection for "
+ "channel '%s'\n", chan_info->channel_path);
+ remove_channel(&chan_info);
+ continue;
+ }
+ if (global_events_list[i].events & EPOLLIN) {
+
+ int n_bytes, err = 0;
+ struct channel_packet pkt;
+ void *buffer = &pkt;
+ int buffer_len = sizeof(pkt);
+
+ while (buffer_len > 0) {
+ n_bytes = read(chan_info->fd, buffer, buffer_len);
+ if (n_bytes == buffer_len)
+ break;
+ if (n_bytes == -1) {
+ err = errno;
+ RTE_LOG(DEBUG, CHANNEL_MONITOR, "Received error on "
+ "channel '%s' read: %s\n",
+ chan_info->channel_path, strerror(err));
+ remove_channel(&chan_info);
+ break;
+ }
+ buffer = (char *)buffer + n_bytes;
+ buffer_len -= n_bytes;
+ }
+ if (!err)
+ process_request(&pkt, chan_info);
+ }
+ }
+ }
+}
diff --git a/examples/vm_power_manager/channel_monitor.h b/examples/vm_power_manager/channel_monitor.h
new file mode 100644
index 00000000..c1386079
--- /dev/null
+++ b/examples/vm_power_manager/channel_monitor.h
@@ -0,0 +1,102 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CHANNEL_MONITOR_H_
+#define CHANNEL_MONITOR_H_
+
+#include "channel_manager.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * Setup the Channel Monitor resources required to initialize epoll.
+ * Must be called first before calling other functions.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int channel_monitor_init(void);
+
+/**
+ * Run the channel monitor, loops forever on on epoll_wait.
+ *
+ *
+ * @return
+ * None
+ */
+void run_channel_monitor(void);
+
+/**
+ * Exit the Channel Monitor, exiting the epoll_wait loop and events processing.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+void channel_monitor_exit(void);
+
+/**
+ * Add an open channel to monitor via epoll. A pointer to struct channel_info
+ * will be registered with epoll for event processing.
+ * It is thread-safe.
+ *
+ * @param chan_info
+ * Pointer to struct channel_info pointer.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int add_channel_to_monitor(struct channel_info **chan_info);
+
+/**
+ * Remove a previously added channel from epoll control.
+ *
+ * @param chan_info
+ * Pointer to struct channel_info.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int remove_channel_from_monitor(struct channel_info *chan_info);
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* CHANNEL_MONITOR_H_ */
diff --git a/examples/vm_power_manager/guest_cli/Makefile b/examples/vm_power_manager/guest_cli/Makefile
new file mode 100644
index 00000000..55072708
--- /dev/null
+++ b/examples/vm_power_manager/guest_cli/Makefile
@@ -0,0 +1,56 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overridden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = guest_vm_power_mgr
+
+# all source are stored in SRCS-y
+SRCS-y := main.c vm_power_cli_guest.c
+
+CFLAGS += -O3 -I$(RTE_SDK)/lib/librte_power/
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vm_power_manager/guest_cli/main.c b/examples/vm_power_manager/guest_cli/main.c
new file mode 100644
index 00000000..5ac98ed3
--- /dev/null
+++ b/examples/vm_power_manager/guest_cli/main.c
@@ -0,0 +1,86 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/epoll.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+*/
+#include <signal.h>
+
+#include <rte_lcore.h>
+#include <rte_power.h>
+#include <rte_debug.h>
+
+#include "vm_power_cli_guest.h"
+
+static void
+sig_handler(int signo)
+{
+ printf("Received signal %d, exiting...\n", signo);
+ unsigned lcore_id;
+
+ RTE_LCORE_FOREACH(lcore_id) {
+ rte_power_exit(lcore_id);
+ }
+
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned lcore_id;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ rte_power_set_env(PM_ENV_KVM_VM);
+ RTE_LCORE_FOREACH(lcore_id) {
+ rte_power_init(lcore_id);
+ }
+ run_cli(NULL);
+
+ return 0;
+}
diff --git a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
new file mode 100644
index 00000000..7931135e
--- /dev/null
+++ b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.c
@@ -0,0 +1,155 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <termios.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+#include <rte_log.h>
+#include <rte_lcore.h>
+
+#include <rte_power.h>
+
+#include "vm_power_cli_guest.h"
+
+
+#define CHANNEL_PATH "/dev/virtio-ports/virtio.serial.port.poweragent"
+
+
+#define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ __attribute__((unused)) struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ unsigned lcore_id;
+
+ RTE_LCORE_FOREACH(lcore_id) {
+ rte_power_exit(lcore_id);
+ }
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "close the application",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/* *** VM operations *** */
+
+struct cmd_set_cpu_freq_result {
+ cmdline_fixed_string_t set_cpu_freq;
+ uint8_t lcore_id;
+ cmdline_fixed_string_t cmd;
+};
+
+static void
+cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ int ret = -1;
+ struct cmd_set_cpu_freq_result *res = parsed_result;
+
+ if (!strcmp(res->cmd , "up"))
+ ret = rte_power_freq_up(res->lcore_id);
+ else if (!strcmp(res->cmd , "down"))
+ ret = rte_power_freq_down(res->lcore_id);
+ else if (!strcmp(res->cmd , "min"))
+ ret = rte_power_freq_min(res->lcore_id);
+ else if (!strcmp(res->cmd , "max"))
+ ret = rte_power_freq_max(res->lcore_id);
+ if (ret != 1)
+ cmdline_printf(cl, "Error sending message: %s\n", strerror(ret));
+}
+
+cmdline_parse_token_string_t cmd_set_cpu_freq =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result,
+ set_cpu_freq, "set_cpu_freq");
+cmdline_parse_token_string_t cmd_set_cpu_freq_core_num =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result,
+ lcore_id, UINT8);
+cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result,
+ cmd, "up#down#min#max");
+
+cmdline_parse_inst_t cmd_set_cpu_freq_set = {
+ .f = cmd_set_cpu_freq_parsed,
+ .data = NULL,
+ .help_str = "set_cpu_freq <core_num> <up|down|min|max>, Set the current "
+ "frequency for the specified core by scaling up/down/min/max",
+ .tokens = {
+ (void *)&cmd_set_cpu_freq,
+ (void *)&cmd_set_cpu_freq_core_num,
+ (void *)&cmd_set_cpu_freq_cmd_cmd,
+ NULL,
+ },
+};
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set,
+ NULL,
+};
+
+void
+run_cli(__attribute__((unused)) void *arg)
+{
+ struct cmdline *cl;
+
+ cl = cmdline_stdin_new(main_ctx, "vmpower(guest)> ");
+ if (cl == NULL)
+ return;
+
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+}
diff --git a/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h
new file mode 100644
index 00000000..0c4bdd5b
--- /dev/null
+++ b/examples/vm_power_manager/guest_cli/vm_power_cli_guest.h
@@ -0,0 +1,55 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef VM_POWER_CLI_H_
+#define VM_POWER_CLI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "channel_commands.h"
+
+int guest_channel_host_connect(unsigned lcore_id);
+
+int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id);
+
+void guest_channel_host_disconnect(unsigned lcore_id);
+
+void run_cli(__attribute__((unused)) void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* VM_POWER_CLI_H_ */
diff --git a/examples/vm_power_manager/main.c b/examples/vm_power_manager/main.c
new file mode 100644
index 00000000..97178d14
--- /dev/null
+++ b/examples/vm_power_manager/main.c
@@ -0,0 +1,115 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <stdint.h>
+#include <errno.h>
+#include <sys/epoll.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <signal.h>
+#include <errno.h>
+
+#include <sys/queue.h>
+
+#include <rte_common.h>
+#include <rte_eal.h>
+#include <rte_launch.h>
+#include <rte_log.h>
+#include <rte_per_lcore.h>
+#include <rte_lcore.h>
+#include <rte_debug.h>
+
+#include "channel_manager.h"
+#include "channel_monitor.h"
+#include "power_manager.h"
+#include "vm_power_cli.h"
+
+static int
+run_monitor(__attribute__((unused)) void *arg)
+{
+ if (channel_monitor_init() < 0) {
+ printf("Unable to initialize channel monitor\n");
+ return -1;
+ }
+ run_channel_monitor();
+ return 0;
+}
+
+static void
+sig_handler(int signo)
+{
+ printf("Received signal %d, exiting...\n", signo);
+ channel_monitor_exit();
+ channel_manager_exit();
+ power_manager_exit();
+
+}
+
+int
+main(int argc, char **argv)
+{
+ int ret;
+ unsigned lcore_id;
+
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_panic("Cannot init EAL\n");
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ lcore_id = rte_get_next_lcore(-1, 1, 0);
+ if (lcore_id == RTE_MAX_LCORE) {
+ RTE_LOG(ERR, EAL, "A minimum of two cores are required to run "
+ "application\n");
+ return 0;
+ }
+ rte_eal_remote_launch(run_monitor, NULL, lcore_id);
+
+ if (power_manager_init() < 0) {
+ printf("Unable to initialize power manager\n");
+ return -1;
+ }
+ if (channel_manager_init(CHANNEL_MGR_DEFAULT_HV_PATH) < 0) {
+ printf("Unable to initialize channel manager\n");
+ return -1;
+ }
+ run_cli(NULL);
+
+ rte_eal_mp_wait_lcore();
+ return 0;
+}
diff --git a/examples/vm_power_manager/power_manager.c b/examples/vm_power_manager/power_manager.c
new file mode 100644
index 00000000..2644fce6
--- /dev/null
+++ b/examples/vm_power_manager/power_manager.c
@@ -0,0 +1,252 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <sys/un.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <errno.h>
+
+#include <sys/types.h>
+
+#include <rte_log.h>
+#include <rte_power.h>
+#include <rte_spinlock.h>
+
+#include "power_manager.h"
+
+#define RTE_LOGTYPE_POWER_MANAGER RTE_LOGTYPE_USER1
+
+#define POWER_SCALE_CORE(DIRECTION, core_num , ret) do { \
+ if (core_num >= POWER_MGR_MAX_CPUS) \
+ return -1; \
+ if (!(global_enabled_cpus & (1ULL << core_num))) \
+ return -1; \
+ rte_spinlock_lock(&global_core_freq_info[core_num].power_sl); \
+ ret = rte_power_freq_##DIRECTION(core_num); \
+ rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl); \
+} while (0)
+
+#define POWER_SCALE_MASK(DIRECTION, core_mask, ret) do { \
+ int i; \
+ for (i = 0; core_mask; core_mask &= ~(1 << i++)) { \
+ if ((core_mask >> i) & 1) { \
+ if (!(global_enabled_cpus & (1ULL << i))) \
+ continue; \
+ rte_spinlock_lock(&global_core_freq_info[i].power_sl); \
+ if (rte_power_freq_##DIRECTION(i) != 1) \
+ ret = -1; \
+ rte_spinlock_unlock(&global_core_freq_info[i].power_sl); \
+ } \
+ } \
+} while (0)
+
+struct freq_info {
+ rte_spinlock_t power_sl;
+ uint32_t freqs[RTE_MAX_LCORE_FREQS];
+ unsigned num_freqs;
+} __rte_cache_aligned;
+
+static struct freq_info global_core_freq_info[POWER_MGR_MAX_CPUS];
+
+static uint64_t global_enabled_cpus;
+
+#define SYSFS_CPU_PATH "/sys/devices/system/cpu/cpu%u/topology/core_id"
+
+static unsigned
+set_host_cpus_mask(void)
+{
+ char path[PATH_MAX];
+ unsigned i;
+ unsigned num_cpus = 0;
+
+ for (i = 0; i < POWER_MGR_MAX_CPUS; i++) {
+ snprintf(path, sizeof(path), SYSFS_CPU_PATH, i);
+ if (access(path, F_OK) == 0) {
+ global_enabled_cpus |= 1ULL << i;
+ num_cpus++;
+ } else
+ return num_cpus;
+ }
+ return num_cpus;
+}
+
+int
+power_manager_init(void)
+{
+ unsigned i, num_cpus;
+ uint64_t cpu_mask;
+ int ret = 0;
+
+ num_cpus = set_host_cpus_mask();
+ if (num_cpus == 0) {
+ RTE_LOG(ERR, POWER_MANAGER, "Unable to detected host CPUs, please "
+ "ensure that sufficient privileges exist to inspect sysfs\n");
+ return -1;
+ }
+ rte_power_set_env(PM_ENV_ACPI_CPUFREQ);
+ cpu_mask = global_enabled_cpus;
+ for (i = 0; cpu_mask; cpu_mask &= ~(1 << i++)) {
+ if (rte_power_init(i) < 0 || rte_power_freqs(i,
+ global_core_freq_info[i].freqs,
+ RTE_MAX_LCORE_FREQS) == 0) {
+ RTE_LOG(ERR, POWER_MANAGER, "Unable to initialize power manager "
+ "for core %u\n", i);
+ global_enabled_cpus &= ~(1 << i);
+ num_cpus--;
+ ret = -1;
+ }
+ rte_spinlock_init(&global_core_freq_info[i].power_sl);
+ }
+ RTE_LOG(INFO, POWER_MANAGER, "Detected %u host CPUs , enabled core mask:"
+ " 0x%"PRIx64"\n", num_cpus, global_enabled_cpus);
+ return ret;
+
+}
+
+uint32_t
+power_manager_get_current_frequency(unsigned core_num)
+{
+ uint32_t freq, index;
+
+ if (core_num >= POWER_MGR_MAX_CPUS) {
+ RTE_LOG(ERR, POWER_MANAGER, "Core(%u) is out of range 0...%d\n",
+ core_num, POWER_MGR_MAX_CPUS-1);
+ return -1;
+ }
+ if (!(global_enabled_cpus & (1ULL << core_num)))
+ return 0;
+
+ rte_spinlock_lock(&global_core_freq_info[core_num].power_sl);
+ index = rte_power_get_freq(core_num);
+ rte_spinlock_unlock(&global_core_freq_info[core_num].power_sl);
+ if (index >= POWER_MGR_MAX_CPUS)
+ freq = 0;
+ else
+ freq = global_core_freq_info[core_num].freqs[index];
+
+ return freq;
+}
+
+int
+power_manager_exit(void)
+{
+ unsigned int i;
+ int ret = 0;
+
+ for (i = 0; global_enabled_cpus; global_enabled_cpus &= ~(1 << i++)) {
+ if (rte_power_exit(i) < 0) {
+ RTE_LOG(ERR, POWER_MANAGER, "Unable to shutdown power manager "
+ "for core %u\n", i);
+ ret = -1;
+ }
+ }
+ global_enabled_cpus = 0;
+ return ret;
+}
+
+int
+power_manager_scale_mask_up(uint64_t core_mask)
+{
+ int ret = 0;
+
+ POWER_SCALE_MASK(up, core_mask, ret);
+ return ret;
+}
+
+int
+power_manager_scale_mask_down(uint64_t core_mask)
+{
+ int ret = 0;
+
+ POWER_SCALE_MASK(down, core_mask, ret);
+ return ret;
+}
+
+int
+power_manager_scale_mask_min(uint64_t core_mask)
+{
+ int ret = 0;
+
+ POWER_SCALE_MASK(min, core_mask, ret);
+ return ret;
+}
+
+int
+power_manager_scale_mask_max(uint64_t core_mask)
+{
+ int ret = 0;
+
+ POWER_SCALE_MASK(max, core_mask, ret);
+ return ret;
+}
+
+int
+power_manager_scale_core_up(unsigned core_num)
+{
+ int ret = 0;
+
+ POWER_SCALE_CORE(up, core_num, ret);
+ return ret;
+}
+
+int
+power_manager_scale_core_down(unsigned core_num)
+{
+ int ret = 0;
+
+ POWER_SCALE_CORE(down, core_num, ret);
+ return ret;
+}
+
+int
+power_manager_scale_core_min(unsigned core_num)
+{
+ int ret = 0;
+
+ POWER_SCALE_CORE(min, core_num, ret);
+ return ret;
+}
+
+int
+power_manager_scale_core_max(unsigned core_num)
+{
+ int ret = 0;
+
+ POWER_SCALE_CORE(max, core_num, ret);
+ return ret;
+}
diff --git a/examples/vm_power_manager/power_manager.h b/examples/vm_power_manager/power_manager.h
new file mode 100644
index 00000000..1b45babf
--- /dev/null
+++ b/examples/vm_power_manager/power_manager.h
@@ -0,0 +1,188 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef POWER_MANAGER_H_
+#define POWER_MANAGER_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Maximum number of CPUS to manage */
+#define POWER_MGR_MAX_CPUS 64
+/**
+ * Initialize power management.
+ * Initializes resources and verifies the number of CPUs on the system.
+ * Wraps librte_power int rte_power_init(unsigned lcore_id);
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int power_manager_init(void);
+
+/**
+ * Exit power management. Must be called prior to exiting the application.
+ *
+ * @return
+ * - 0 on success.
+ * - Negative on error.
+ */
+int power_manager_exit(void);
+
+/**
+ * Scale up the frequency of the cores specified in core_mask.
+ * It is thread-safe.
+ *
+ * @param core_mask
+ * The uint64_t bit-mask of cores to change frequency.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int power_manager_scale_mask_up(uint64_t core_mask);
+
+/**
+ * Scale down the frequency of the cores specified in core_mask.
+ * It is thread-safe.
+ *
+ * @param core_mask
+ * The uint64_t bit-mask of cores to change frequency.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int power_manager_scale_mask_down(uint64_t core_mask);
+
+/**
+ * Scale to the minimum frequency of the cores specified in core_mask.
+ * It is thread-safe.
+ *
+ * @param core_mask
+ * The uint64_t bit-mask of cores to change frequency.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int power_manager_scale_mask_min(uint64_t core_mask);
+
+/**
+ * Scale to the maximum frequency of the cores specified in core_mask.
+ * It is thread-safe.
+ *
+ * @param core_mask
+ * The uint64_t bit-mask of cores to change frequency.
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int power_manager_scale_mask_max(uint64_t core_mask);
+
+/**
+ * Scale up frequency for the core specified by core_num.
+ * It is thread-safe.
+ *
+ * @param core_num
+ * The core number to change frequency
+ *
+ * @return
+ * - 1 on success.
+ * - Negative on error.
+ */
+int power_manager_scale_core_up(unsigned core_num);
+
+/**
+ * Scale down frequency for the core specified by core_num.
+ * It is thread-safe.
+ *
+ * @param core_num
+ * The core number to change frequency
+ *
+ * @return
+ * - 1 on success.
+ * - 0 if frequency not changed.
+ * - Negative on error.
+ */
+int power_manager_scale_core_down(unsigned core_num);
+
+/**
+ * Scale to minimum frequency for the core specified by core_num.
+ * It is thread-safe.
+ *
+ * @param core_num
+ * The core number to change frequency
+ *
+ * @return
+ * - 1 on success.
+ * - 0 if frequency not changed.
+ * - Negative on error.
+ */
+int power_manager_scale_core_min(unsigned core_num);
+
+/**
+ * Scale to maximum frequency for the core specified by core_num.
+ * It is thread-safe.
+ *
+ * @param core_num
+ * The core number to change frequency
+ *
+ * @return
+ * - 1 on success.
+ * - 0 if frequency not changed.
+ * - Negative on error.
+ */
+int power_manager_scale_core_max(unsigned core_num);
+
+/**
+ * Get the current freuency of the core specified by core_num
+ *
+ * @param core_num
+ * The core number to get the current frequency
+ *
+ * @return
+ * - 0 on error
+ * - >0 for current frequency.
+ */
+uint32_t power_manager_get_current_frequency(unsigned core_num);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+
+#endif /* POWER_MANAGER_H_ */
diff --git a/examples/vm_power_manager/vm_power_cli.c b/examples/vm_power_manager/vm_power_cli.c
new file mode 100644
index 00000000..c5e8d934
--- /dev/null
+++ b/examples/vm_power_manager/vm_power_cli.c
@@ -0,0 +1,672 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdlib.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <termios.h>
+#include <errno.h>
+
+#include <cmdline_rdline.h>
+#include <cmdline_parse.h>
+#include <cmdline_parse_string.h>
+#include <cmdline_parse_num.h>
+#include <cmdline_socket.h>
+#include <cmdline.h>
+
+#include "vm_power_cli.h"
+#include "channel_manager.h"
+#include "channel_monitor.h"
+#include "power_manager.h"
+#include "channel_commands.h"
+
+struct cmd_quit_result {
+ cmdline_fixed_string_t quit;
+};
+
+static void cmd_quit_parsed(__attribute__((unused)) void *parsed_result,
+ struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ channel_monitor_exit();
+ channel_manager_exit();
+ power_manager_exit();
+ cmdline_quit(cl);
+}
+
+cmdline_parse_token_string_t cmd_quit_quit =
+ TOKEN_STRING_INITIALIZER(struct cmd_quit_result, quit, "quit");
+
+cmdline_parse_inst_t cmd_quit = {
+ .f = cmd_quit_parsed, /* function to call */
+ .data = NULL, /* 2nd arg of func */
+ .help_str = "close the application",
+ .tokens = { /* token list, NULL terminated */
+ (void *)&cmd_quit_quit,
+ NULL,
+ },
+};
+
+/* *** VM operations *** */
+struct cmd_show_vm_result {
+ cmdline_fixed_string_t show_vm;
+ cmdline_fixed_string_t vm_name;
+};
+
+static void
+cmd_show_vm_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_show_vm_result *res = parsed_result;
+ struct vm_info info;
+ unsigned i;
+
+ if (get_info_vm(res->vm_name, &info) != 0)
+ return;
+ cmdline_printf(cl, "VM: '%s', status = ", info.name);
+ if (info.status == CHANNEL_MGR_VM_ACTIVE)
+ cmdline_printf(cl, "ACTIVE\n");
+ else
+ cmdline_printf(cl, "INACTIVE\n");
+ cmdline_printf(cl, "Channels %u\n", info.num_channels);
+ for (i = 0; i < info.num_channels; i++) {
+ cmdline_printf(cl, " [%u]: %s, status = ", i,
+ info.channels[i].channel_path);
+ switch (info.channels[i].status) {
+ case CHANNEL_MGR_CHANNEL_CONNECTED:
+ cmdline_printf(cl, "CONNECTED\n");
+ break;
+ case CHANNEL_MGR_CHANNEL_DISCONNECTED:
+ cmdline_printf(cl, "DISCONNECTED\n");
+ break;
+ case CHANNEL_MGR_CHANNEL_DISABLED:
+ cmdline_printf(cl, "DISABLED\n");
+ break;
+ case CHANNEL_MGR_CHANNEL_PROCESSING:
+ cmdline_printf(cl, "PROCESSING\n");
+ break;
+ default:
+ cmdline_printf(cl, "UNKNOWN\n");
+ break;
+ }
+ }
+ cmdline_printf(cl, "Virtual CPU(s): %u\n", info.num_vcpus);
+ for (i = 0; i < info.num_vcpus; i++) {
+ cmdline_printf(cl, " [%u]: Physical CPU Mask 0x%"PRIx64"\n", i,
+ info.pcpu_mask[i]);
+ }
+}
+
+
+
+cmdline_parse_token_string_t cmd_vm_show =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result,
+ show_vm, "show_vm");
+cmdline_parse_token_string_t cmd_show_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_vm_result,
+ vm_name, NULL);
+
+cmdline_parse_inst_t cmd_show_vm_set = {
+ .f = cmd_show_vm_parsed,
+ .data = NULL,
+ .help_str = "show_vm <vm_name>, prints the information on the "
+ "specified VM(s), the information lists the number of vCPUS, the "
+ "pinning to pCPU(s) as a bit mask, along with any communication "
+ "channels associated with each VM",
+ .tokens = {
+ (void *)&cmd_vm_show,
+ (void *)&cmd_show_vm_name,
+ NULL,
+ },
+};
+
+/* *** vCPU to pCPU mapping operations *** */
+struct cmd_set_pcpu_mask_result {
+ cmdline_fixed_string_t set_pcpu_mask;
+ cmdline_fixed_string_t vm_name;
+ uint8_t vcpu;
+ uint64_t core_mask;
+};
+
+static void
+cmd_set_pcpu_mask_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_set_pcpu_mask_result *res = parsed_result;
+
+ if (set_pcpus_mask(res->vm_name, res->vcpu, res->core_mask) == 0)
+ cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core "
+ "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask);
+ else
+ cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core "
+ "mask(0x%"PRIx64")\n", res->vcpu, res->core_mask);
+}
+
+cmdline_parse_token_string_t cmd_set_pcpu_mask =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result,
+ set_pcpu_mask, "set_pcpu_mask");
+cmdline_parse_token_string_t cmd_set_pcpu_mask_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_mask_result,
+ vm_name, NULL);
+cmdline_parse_token_num_t set_pcpu_mask_vcpu =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result,
+ vcpu, UINT8);
+cmdline_parse_token_num_t set_pcpu_mask_core_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_mask_result,
+ core_mask, UINT64);
+
+
+cmdline_parse_inst_t cmd_set_pcpu_mask_set = {
+ .f = cmd_set_pcpu_mask_parsed,
+ .data = NULL,
+ .help_str = "set_pcpu_mask <vm_name> <vcpu> <pcpu>, Set the binding "
+ "of Virtual CPU on VM to the Physical CPU mask.",
+ .tokens = {
+ (void *)&cmd_set_pcpu_mask,
+ (void *)&cmd_set_pcpu_mask_vm_name,
+ (void *)&set_pcpu_mask_vcpu,
+ (void *)&set_pcpu_mask_core_mask,
+ NULL,
+ },
+};
+
+struct cmd_set_pcpu_result {
+ cmdline_fixed_string_t set_pcpu;
+ cmdline_fixed_string_t vm_name;
+ uint8_t vcpu;
+ uint8_t core;
+};
+
+static void
+cmd_set_pcpu_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_set_pcpu_result *res = parsed_result;
+
+ if (set_pcpu(res->vm_name, res->vcpu, res->core) == 0)
+ cmdline_printf(cl, "Pinned vCPU(%"PRId8") to pCPU core "
+ "%"PRId8")\n", res->vcpu, res->core);
+ else
+ cmdline_printf(cl, "Unable to pin vCPU(%"PRId8") to pCPU core "
+ "%"PRId8")\n", res->vcpu, res->core);
+}
+
+cmdline_parse_token_string_t cmd_set_pcpu =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result,
+ set_pcpu, "set_pcpu");
+cmdline_parse_token_string_t cmd_set_pcpu_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_pcpu_result,
+ vm_name, NULL);
+cmdline_parse_token_num_t set_pcpu_vcpu =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result,
+ vcpu, UINT8);
+cmdline_parse_token_num_t set_pcpu_core =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_pcpu_result,
+ core, UINT64);
+
+
+cmdline_parse_inst_t cmd_set_pcpu_set = {
+ .f = cmd_set_pcpu_parsed,
+ .data = NULL,
+ .help_str = "set_pcpu <vm_name> <vcpu> <pcpu>, Set the binding "
+ "of Virtual CPU on VM to the Physical CPU.",
+ .tokens = {
+ (void *)&cmd_set_pcpu,
+ (void *)&cmd_set_pcpu_vm_name,
+ (void *)&set_pcpu_vcpu,
+ (void *)&set_pcpu_core,
+ NULL,
+ },
+};
+
+struct cmd_vm_op_result {
+ cmdline_fixed_string_t op_vm;
+ cmdline_fixed_string_t vm_name;
+};
+
+static void
+cmd_vm_op_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_vm_op_result *res = parsed_result;
+
+ if (!strcmp(res->op_vm, "add_vm")) {
+ if (add_vm(res->vm_name) < 0)
+ cmdline_printf(cl, "Unable to add VM '%s'\n", res->vm_name);
+ } else if (remove_vm(res->vm_name) < 0)
+ cmdline_printf(cl, "Unable to remove VM '%s'\n", res->vm_name);
+}
+
+cmdline_parse_token_string_t cmd_vm_op =
+ TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result,
+ op_vm, "add_vm#rm_vm");
+cmdline_parse_token_string_t cmd_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_vm_op_result,
+ vm_name, NULL);
+
+cmdline_parse_inst_t cmd_vm_op_set = {
+ .f = cmd_vm_op_parsed,
+ .data = NULL,
+ .help_str = "add_vm|rm_vm <name>, add a VM for "
+ "subsequent operations with the CLI or remove a previously added "
+ "VM from the VM Power Manager",
+ .tokens = {
+ (void *)&cmd_vm_op,
+ (void *)&cmd_vm_name,
+ NULL,
+ },
+};
+
+/* *** VM channel operations *** */
+struct cmd_channels_op_result {
+ cmdline_fixed_string_t op;
+ cmdline_fixed_string_t vm_name;
+ cmdline_fixed_string_t channel_list;
+};
+static void
+cmd_channels_op_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ unsigned num_channels = 0, channel_num, i;
+ int channels_added;
+ unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS];
+ char *token, *remaining, *tail_ptr;
+ struct cmd_channels_op_result *res = parsed_result;
+
+ if (!strcmp(res->channel_list, "all")) {
+ channels_added = add_all_channels(res->vm_name);
+ cmdline_printf(cl, "Added %d channels for VM '%s'\n",
+ channels_added, res->vm_name);
+ return;
+ }
+
+ remaining = res->channel_list;
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+
+ token = strsep(&remaining, ",");
+ if (token == NULL)
+ break;
+ errno = 0;
+ channel_num = (unsigned)strtol(token, &tail_ptr, 10);
+ if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0'))
+ break;
+
+ if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) {
+ cmdline_printf(cl, "Channel number '%u' exceeds the maximum number "
+ "of allowable channels(%u) for VM '%s'\n", channel_num,
+ CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name);
+ return;
+ }
+ channel_list[num_channels++] = channel_num;
+ }
+ for (i = 0; i < num_channels; i++)
+ cmdline_printf(cl, "[%u]: Adding channel %u\n", i, channel_list[i]);
+
+ channels_added = add_channels(res->vm_name, channel_list,
+ num_channels);
+ cmdline_printf(cl, "Enabled %d channels for '%s'\n", channels_added,
+ res->vm_name);
+}
+
+cmdline_parse_token_string_t cmd_channels_op =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result,
+ op, "add_channels");
+cmdline_parse_token_string_t cmd_channels_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result,
+ vm_name, NULL);
+cmdline_parse_token_string_t cmd_channels_list =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_op_result,
+ channel_list, NULL);
+
+cmdline_parse_inst_t cmd_channels_op_set = {
+ .f = cmd_channels_op_parsed,
+ .data = NULL,
+ .help_str = "add_channels <vm_name> <list>|all, add "
+ "communication channels for the specified VM, the "
+ "virtio channels must be enabled in the VM "
+ "configuration(qemu/libvirt) and the associated VM must be active. "
+ "<list> is a comma-separated list of channel numbers to add, using "
+ "the keyword 'all' will attempt to add all channels for the VM",
+ .tokens = {
+ (void *)&cmd_channels_op,
+ (void *)&cmd_channels_vm_name,
+ (void *)&cmd_channels_list,
+ NULL,
+ },
+};
+
+struct cmd_channels_status_op_result {
+ cmdline_fixed_string_t op;
+ cmdline_fixed_string_t vm_name;
+ cmdline_fixed_string_t channel_list;
+ cmdline_fixed_string_t status;
+};
+
+static void
+cmd_channels_status_op_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ unsigned num_channels = 0, channel_num;
+ int changed;
+ unsigned channel_list[CHANNEL_CMDS_MAX_VM_CHANNELS];
+ char *token, *remaining, *tail_ptr;
+ struct cmd_channels_status_op_result *res = parsed_result;
+ enum channel_status status;
+
+ if (!strcmp(res->status, "enabled"))
+ status = CHANNEL_MGR_CHANNEL_CONNECTED;
+ else
+ status = CHANNEL_MGR_CHANNEL_DISABLED;
+
+ if (!strcmp(res->channel_list, "all")) {
+ changed = set_channel_status_all(res->vm_name, status);
+ cmdline_printf(cl, "Updated status of %d channels "
+ "for VM '%s'\n", changed, res->vm_name);
+ return;
+ }
+ remaining = res->channel_list;
+ while (1) {
+ if (remaining == NULL || remaining[0] == '\0')
+ break;
+ token = strsep(&remaining, ",");
+ if (token == NULL)
+ break;
+ errno = 0;
+ channel_num = (unsigned)strtol(token, &tail_ptr, 10);
+ if ((errno != 0) || tail_ptr == NULL || (*tail_ptr != '\0'))
+ break;
+
+ if (channel_num == CHANNEL_CMDS_MAX_VM_CHANNELS) {
+ cmdline_printf(cl, "%u exceeds the maximum number of allowable "
+ "channels(%u) for VM '%s'\n", channel_num,
+ CHANNEL_CMDS_MAX_VM_CHANNELS, res->vm_name);
+ return;
+ }
+ channel_list[num_channels++] = channel_num;
+ }
+ changed = set_channel_status(res->vm_name, channel_list, num_channels,
+ status);
+ cmdline_printf(cl, "Updated status of %d channels "
+ "for VM '%s'\n", changed, res->vm_name);
+}
+
+cmdline_parse_token_string_t cmd_channels_status_op =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result,
+ op, "set_channel_status");
+cmdline_parse_token_string_t cmd_channels_status_vm_name =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result,
+ vm_name, NULL);
+cmdline_parse_token_string_t cmd_channels_status_list =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result,
+ channel_list, NULL);
+cmdline_parse_token_string_t cmd_channels_status =
+ TOKEN_STRING_INITIALIZER(struct cmd_channels_status_op_result,
+ status, "enabled#disabled");
+
+cmdline_parse_inst_t cmd_channels_status_op_set = {
+ .f = cmd_channels_status_op_parsed,
+ .data = NULL,
+ .help_str = "set_channel_status <vm_name> <list>|all enabled|disabled, "
+ " enable or disable the communication channels in "
+ "list(comma-separated) for the specified VM, alternatively "
+ "list can be replaced with keyword 'all'. "
+ "Disabled channels will still receive packets on the host, "
+ "however the commands they specify will be ignored. "
+ "Set status to 'enabled' to begin processing requests again.",
+ .tokens = {
+ (void *)&cmd_channels_status_op,
+ (void *)&cmd_channels_status_vm_name,
+ (void *)&cmd_channels_status_list,
+ (void *)&cmd_channels_status,
+ NULL,
+ },
+};
+
+/* *** CPU Frequency operations *** */
+struct cmd_show_cpu_freq_mask_result {
+ cmdline_fixed_string_t show_cpu_freq_mask;
+ uint64_t core_mask;
+};
+
+static void
+cmd_show_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_show_cpu_freq_mask_result *res = parsed_result;
+ unsigned i;
+ uint64_t mask = res->core_mask;
+ uint32_t freq;
+
+ for (i = 0; mask; mask &= ~(1ULL << i++)) {
+ if ((mask >> i) & 1) {
+ freq = power_manager_get_current_frequency(i);
+ if (freq > 0)
+ cmdline_printf(cl, "Core %u: %"PRId32"\n", i, freq);
+ }
+ }
+}
+
+cmdline_parse_token_string_t cmd_show_cpu_freq_mask =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_mask_result,
+ show_cpu_freq_mask, "show_cpu_freq_mask");
+cmdline_parse_token_num_t cmd_show_cpu_freq_mask_core_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_mask_result,
+ core_mask, UINT64);
+
+cmdline_parse_inst_t cmd_show_cpu_freq_mask_set = {
+ .f = cmd_show_cpu_freq_mask_parsed,
+ .data = NULL,
+ .help_str = "show_cpu_freq_mask <mask>, Get the current frequency for each "
+ "core specified in the mask",
+ .tokens = {
+ (void *)&cmd_show_cpu_freq_mask,
+ (void *)&cmd_show_cpu_freq_mask_core_mask,
+ NULL,
+ },
+};
+
+struct cmd_set_cpu_freq_mask_result {
+ cmdline_fixed_string_t set_cpu_freq_mask;
+ uint64_t core_mask;
+ cmdline_fixed_string_t cmd;
+};
+
+static void
+cmd_set_cpu_freq_mask_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_set_cpu_freq_mask_result *res = parsed_result;
+ int ret = -1;
+
+ if (!strcmp(res->cmd , "up"))
+ ret = power_manager_scale_mask_up(res->core_mask);
+ else if (!strcmp(res->cmd , "down"))
+ ret = power_manager_scale_mask_down(res->core_mask);
+ else if (!strcmp(res->cmd , "min"))
+ ret = power_manager_scale_mask_min(res->core_mask);
+ else if (!strcmp(res->cmd , "max"))
+ ret = power_manager_scale_mask_max(res->core_mask);
+ if (ret < 0) {
+ cmdline_printf(cl, "Error scaling core_mask(0x%"PRIx64") '%s' , not "
+ "all cores specified have been scaled\n",
+ res->core_mask, res->cmd);
+ };
+}
+
+cmdline_parse_token_string_t cmd_set_cpu_freq_mask =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result,
+ set_cpu_freq_mask, "set_cpu_freq_mask");
+cmdline_parse_token_num_t cmd_set_cpu_freq_mask_core_mask =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_mask_result,
+ core_mask, UINT64);
+cmdline_parse_token_string_t cmd_set_cpu_freq_mask_result =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_mask_result,
+ cmd, "up#down#min#max");
+
+cmdline_parse_inst_t cmd_set_cpu_freq_mask_set = {
+ .f = cmd_set_cpu_freq_mask_parsed,
+ .data = NULL,
+ .help_str = "set_cpu_freq <core_mask> <up|down|min|max>, Set the current "
+ "frequency for the cores specified in <core_mask> by scaling "
+ "each up/down/min/max.",
+ .tokens = {
+ (void *)&cmd_set_cpu_freq_mask,
+ (void *)&cmd_set_cpu_freq_mask_core_mask,
+ (void *)&cmd_set_cpu_freq_mask_result,
+ NULL,
+ },
+};
+
+
+
+struct cmd_show_cpu_freq_result {
+ cmdline_fixed_string_t show_cpu_freq;
+ uint8_t core_num;
+};
+
+static void
+cmd_show_cpu_freq_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ struct cmd_show_cpu_freq_result *res = parsed_result;
+ uint32_t curr_freq = power_manager_get_current_frequency(res->core_num);
+
+ if (curr_freq == 0) {
+ cmdline_printf(cl, "Unable to get frequency for core %u\n",
+ res->core_num);
+ return;
+ }
+ cmdline_printf(cl, "Core %u frequency: %"PRId32"\n", res->core_num,
+ curr_freq);
+}
+
+cmdline_parse_token_string_t cmd_show_cpu_freq =
+ TOKEN_STRING_INITIALIZER(struct cmd_show_cpu_freq_result,
+ show_cpu_freq, "show_cpu_freq");
+
+cmdline_parse_token_num_t cmd_show_cpu_freq_core_num =
+ TOKEN_NUM_INITIALIZER(struct cmd_show_cpu_freq_result,
+ core_num, UINT8);
+
+cmdline_parse_inst_t cmd_show_cpu_freq_set = {
+ .f = cmd_show_cpu_freq_parsed,
+ .data = NULL,
+ .help_str = "Get the current frequency for the specified core",
+ .tokens = {
+ (void *)&cmd_show_cpu_freq,
+ (void *)&cmd_show_cpu_freq_core_num,
+ NULL,
+ },
+};
+
+struct cmd_set_cpu_freq_result {
+ cmdline_fixed_string_t set_cpu_freq;
+ uint8_t core_num;
+ cmdline_fixed_string_t cmd;
+};
+
+static void
+cmd_set_cpu_freq_parsed(void *parsed_result, struct cmdline *cl,
+ __attribute__((unused)) void *data)
+{
+ int ret = -1;
+ struct cmd_set_cpu_freq_result *res = parsed_result;
+
+ if (!strcmp(res->cmd , "up"))
+ ret = power_manager_scale_core_up(res->core_num);
+ else if (!strcmp(res->cmd , "down"))
+ ret = power_manager_scale_core_down(res->core_num);
+ else if (!strcmp(res->cmd , "min"))
+ ret = power_manager_scale_core_min(res->core_num);
+ else if (!strcmp(res->cmd , "max"))
+ ret = power_manager_scale_core_max(res->core_num);
+ if (ret < 0) {
+ cmdline_printf(cl, "Error scaling core(%u) '%s'\n", res->core_num,
+ res->cmd);
+ }
+}
+
+cmdline_parse_token_string_t cmd_set_cpu_freq =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result,
+ set_cpu_freq, "set_cpu_freq");
+cmdline_parse_token_num_t cmd_set_cpu_freq_core_num =
+ TOKEN_NUM_INITIALIZER(struct cmd_set_cpu_freq_result,
+ core_num, UINT8);
+cmdline_parse_token_string_t cmd_set_cpu_freq_cmd_cmd =
+ TOKEN_STRING_INITIALIZER(struct cmd_set_cpu_freq_result,
+ cmd, "up#down#min#max");
+
+cmdline_parse_inst_t cmd_set_cpu_freq_set = {
+ .f = cmd_set_cpu_freq_parsed,
+ .data = NULL,
+ .help_str = "set_cpu_freq <core_num> <up|down|min|max>, Set the current "
+ "frequency for the specified core by scaling up/down/min/max",
+ .tokens = {
+ (void *)&cmd_set_cpu_freq,
+ (void *)&cmd_set_cpu_freq_core_num,
+ (void *)&cmd_set_cpu_freq_cmd_cmd,
+ NULL,
+ },
+};
+
+cmdline_parse_ctx_t main_ctx[] = {
+ (cmdline_parse_inst_t *)&cmd_quit,
+ (cmdline_parse_inst_t *)&cmd_vm_op_set,
+ (cmdline_parse_inst_t *)&cmd_channels_op_set,
+ (cmdline_parse_inst_t *)&cmd_channels_status_op_set,
+ (cmdline_parse_inst_t *)&cmd_show_vm_set,
+ (cmdline_parse_inst_t *)&cmd_show_cpu_freq_mask_set,
+ (cmdline_parse_inst_t *)&cmd_set_cpu_freq_mask_set,
+ (cmdline_parse_inst_t *)&cmd_show_cpu_freq_set,
+ (cmdline_parse_inst_t *)&cmd_set_cpu_freq_set,
+ (cmdline_parse_inst_t *)&cmd_set_pcpu_mask_set,
+ (cmdline_parse_inst_t *)&cmd_set_pcpu_set,
+ NULL,
+};
+
+void
+run_cli(__attribute__((unused)) void *arg)
+{
+ struct cmdline *cl;
+
+ cl = cmdline_stdin_new(main_ctx, "vmpower> ");
+ if (cl == NULL)
+ return;
+
+ cmdline_interact(cl);
+ cmdline_stdin_exit(cl);
+}
diff --git a/examples/vm_power_manager/vm_power_cli.h b/examples/vm_power_manager/vm_power_cli.h
new file mode 100644
index 00000000..deccd513
--- /dev/null
+++ b/examples/vm_power_manager/vm_power_cli.h
@@ -0,0 +1,47 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef VM_POWER_CLI_H_
+#define VM_POWER_CLI_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void run_cli(__attribute__((unused)) void *arg);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* VM_POWER_CLI_H_ */
diff --git a/examples/vmdq/Makefile b/examples/vmdq/Makefile
new file mode 100644
index 00000000..198e3bfe
--- /dev/null
+++ b/examples/vmdq/Makefile
@@ -0,0 +1,51 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vmdq_app
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+EXTRA_CFLAGS += -O3
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vmdq/main.c b/examples/vmdq/main.c
new file mode 100644
index 00000000..178af2f5
--- /dev/null
+++ b/examples/vmdq/main.c
@@ -0,0 +1,641 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+
+#define MAX_QUEUES 1024
+/*
+ * 1024 queues require to meet the needs of a large number of vmdq_pools.
+ * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
+ */
+#define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
+ RTE_TEST_TX_DESC_DEFAULT))
+#define MBUF_CACHE_SIZE 64
+
+#define MAX_PKT_BURST 32
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+#define INVALID_PORT_ID 0xFF
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+
+/* number of pools (if user does not specify any, 8 by default */
+static uint32_t num_queues = 8;
+static uint32_t num_pools = 8;
+
+/* empty vmdq configuration structure. Filled in programatically */
+static const struct rte_eth_conf vmdq_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_VMDQ_ONLY,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ },
+
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_NONE,
+ },
+ .rx_adv_conf = {
+ /*
+ * should be overridden separately in code with
+ * appropriate values
+ */
+ .vmdq_rx_conf = {
+ .nb_queue_pools = ETH_8_POOLS,
+ .enable_default_pool = 0,
+ .default_pool = 0,
+ .nb_pool_maps = 0,
+ .pool_map = {{0, 0},},
+ },
+ },
+};
+
+static unsigned lcore_ids[RTE_MAX_LCORE];
+static uint8_t ports[RTE_MAX_ETHPORTS];
+static unsigned num_ports; /**< The number of ports specified in command line */
+
+/* array used for printing out statistics */
+volatile unsigned long rxPackets[MAX_QUEUES] = {0};
+
+const uint16_t vlan_tags[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31,
+ 32, 33, 34, 35, 36, 37, 38, 39,
+ 40, 41, 42, 43, 44, 45, 46, 47,
+ 48, 49, 50, 51, 52, 53, 54, 55,
+ 56, 57, 58, 59, 60, 61, 62, 63,
+};
+const uint16_t num_vlans = RTE_DIM(vlan_tags);
+static uint16_t num_pf_queues, num_vmdq_queues;
+static uint16_t vmdq_pool_base, vmdq_queue_base;
+/* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
+static struct ether_addr pool_addr_template = {
+ .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
+};
+
+/* ethernet addresses of ports */
+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+#define MAX_QUEUE_NUM_10G 128
+#define MAX_QUEUE_NUM_1G 8
+#define MAX_POOL_MAP_NUM_10G 64
+#define MAX_POOL_MAP_NUM_1G 32
+#define MAX_POOL_NUM_10G 64
+#define MAX_POOL_NUM_1G 8
+/*
+ * Builds up the correct configuration for vmdq based on the vlan tags array
+ * given above, and determine the queue number and pool map number according to
+ * valid pool number
+ */
+static inline int
+get_eth_conf(struct rte_eth_conf *eth_conf, uint32_t num_pools)
+{
+ struct rte_eth_vmdq_rx_conf conf;
+ unsigned i;
+
+ conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
+ conf.nb_pool_maps = num_pools;
+ conf.enable_default_pool = 0;
+ conf.default_pool = 0; /* set explicit value, even if not used */
+
+ for (i = 0; i < conf.nb_pool_maps; i++) {
+ conf.pool_map[i].vlan_id = vlan_tags[i];
+ conf.pool_map[i].pools = (1UL << (i % num_pools));
+ }
+
+ (void)(rte_memcpy(eth_conf, &vmdq_conf_default, sizeof(*eth_conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &conf,
+ sizeof(eth_conf->rx_adv_conf.vmdq_rx_conf)));
+ return 0;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_rxconf *rxconf;
+ struct rte_eth_conf port_conf;
+ uint16_t rxRings, txRings;
+ const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT, txRingSize = RTE_TEST_TX_DESC_DEFAULT;
+ int retval;
+ uint16_t q;
+ uint16_t queues_per_pool;
+ uint32_t max_nb_pools;
+
+ /*
+ * The max pool number from dev_info will be used to validate the pool
+ * number specified in cmd line
+ */
+ rte_eth_dev_info_get(port, &dev_info);
+ max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
+ /*
+ * We allow to process part of VMDQ pools specified by num_pools in
+ * command line.
+ */
+ if (num_pools > max_nb_pools) {
+ printf("num_pools %d >max_nb_pools %d\n",
+ num_pools, max_nb_pools);
+ return -1;
+ }
+ retval = get_eth_conf(&port_conf, max_nb_pools);
+ if (retval < 0)
+ return retval;
+
+ /*
+ * NIC queues are divided into pf queues and vmdq queues.
+ */
+ /* There is assumption here all ports have the same configuration! */
+ num_pf_queues = dev_info.max_rx_queues - dev_info.vmdq_queue_num;
+ queues_per_pool = dev_info.vmdq_queue_num / dev_info.max_vmdq_pools;
+ num_vmdq_queues = num_pools * queues_per_pool;
+ num_queues = num_pf_queues + num_vmdq_queues;
+ vmdq_queue_base = dev_info.vmdq_queue_base;
+ vmdq_pool_base = dev_info.vmdq_pool_base;
+
+ printf("pf queue num: %u, configured vmdq pool num: %u,"
+ " each vmdq pool has %u queues\n",
+ num_pf_queues, num_pools, queues_per_pool);
+ printf("vmdq queue base: %d pool base %d\n",
+ vmdq_queue_base, vmdq_pool_base);
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ /*
+ * Though in this example, we only receive packets from the first queue
+ * of each pool and send packets through first rte_lcore_count() tx
+ * queues of vmdq queues, all queues including pf queues are setup.
+ * This is because VMDQ queues doesn't always start from zero, and the
+ * PMD layer doesn't support selectively initialising part of rx/tx
+ * queues.
+ */
+ rxRings = (uint16_t)dev_info.max_rx_queues;
+ txRings = (uint16_t)dev_info.max_tx_queues;
+ retval = rte_eth_dev_configure(port, rxRings, txRings, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ rte_eth_dev_info_get(port, &dev_info);
+ rxconf = &dev_info.default_rxconf;
+ rxconf->rx_drop_en = 1;
+ for (q = 0; q < rxRings; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
+ rte_eth_dev_socket_id(port),
+ rxconf,
+ mbuf_pool);
+ if (retval < 0) {
+ printf("initialise rx queue %d failed\n", q);
+ return retval;
+ }
+ }
+
+ for (q = 0; q < txRings; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, txRingSize,
+ rte_eth_dev_socket_id(port),
+ NULL);
+ if (retval < 0) {
+ printf("initialise tx queue %d failed\n", q);
+ return retval;
+ }
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0) {
+ printf("port %d start failed\n", port);
+ return retval;
+ }
+
+ rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ vmdq_ports_eth_addr[port].addr_bytes[0],
+ vmdq_ports_eth_addr[port].addr_bytes[1],
+ vmdq_ports_eth_addr[port].addr_bytes[2],
+ vmdq_ports_eth_addr[port].addr_bytes[3],
+ vmdq_ports_eth_addr[port].addr_bytes[4],
+ vmdq_ports_eth_addr[port].addr_bytes[5]);
+
+ /*
+ * Set mac for each pool.
+ * There is no default mac for the pools in i40.
+ * Removes this after i40e fixes this issue.
+ */
+ for (q = 0; q < num_pools; q++) {
+ struct ether_addr mac;
+ mac = pool_addr_template;
+ mac.addr_bytes[4] = port;
+ mac.addr_bytes[5] = q;
+ printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
+ port, q,
+ mac.addr_bytes[0], mac.addr_bytes[1],
+ mac.addr_bytes[2], mac.addr_bytes[3],
+ mac.addr_bytes[4], mac.addr_bytes[5]);
+ retval = rte_eth_dev_mac_addr_add(port, &mac,
+ q + vmdq_pool_base);
+ if (retval) {
+ printf("mac addr add failed at pool %d\n", q);
+ return retval;
+ }
+ }
+
+ return 0;
+}
+
+/* Check num_pools parameter and set it if OK*/
+static int
+vmdq_parse_num_pools(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (num_pools > num_vlans) {
+ printf("num_pools %d > num_vlans %d\n", num_pools, num_vlans);
+ return -1;
+ }
+
+ num_pools = n;
+
+ return 0;
+}
+
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/* Display usage */
+static void
+vmdq_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK]\n"
+ " --nb-pools NP: number of pools\n",
+ prgname);
+}
+
+/* Parse the argument (num_pools) given in the command line of the application */
+static int
+vmdq_parse_args(int argc, char **argv)
+{
+ int opt;
+ int option_index;
+ unsigned i;
+ const char *prgname = argv[0];
+ static struct option long_option[] = {
+ {"nb-pools", required_argument, NULL, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:", long_option,
+ &option_index)) != EOF) {
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ vmdq_usage(prgname);
+ return -1;
+ }
+ break;
+ case 0:
+ if (vmdq_parse_num_pools(optarg) == -1) {
+ printf("invalid number of pools\n");
+ vmdq_usage(prgname);
+ return -1;
+ }
+ break;
+
+ default:
+ vmdq_usage(prgname);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (enabled_port_mask & (1 << i))
+ ports[num_ports++] = (uint8_t)i;
+ }
+
+ if (num_ports < 2 || num_ports % 2) {
+ printf("Current enabled port number is %u,"
+ "but it should be even and at least 2\n", num_ports);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+update_mac_address(struct rte_mbuf *m, unsigned dst_port)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
+}
+
+/* When we receive a HUP signal, print out our stats */
+static void
+sighup_handler(int signum)
+{
+ unsigned q;
+ for (q = 0; q < num_queues; q++) {
+ if (q % (num_queues/num_pools) == 0)
+ printf("\nPool %u: ", q/(num_queues/num_pools));
+ printf("%lu ", rxPackets[q]);
+ }
+ printf("\nFinished handling signal %d\n", signum);
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static int
+lcore_main(__attribute__((__unused__)) void *dummy)
+{
+ const uint16_t lcore_id = (uint16_t)rte_lcore_id();
+ const uint16_t num_cores = (uint16_t)rte_lcore_count();
+ uint16_t core_id = 0;
+ uint16_t startQueue, endQueue;
+ uint16_t q, i, p;
+ const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
+
+ for (i = 0; i < num_cores; i++)
+ if (lcore_ids[i] == lcore_id) {
+ core_id = i;
+ break;
+ }
+
+ if (remainder != 0) {
+ if (core_id < remainder) {
+ startQueue = (uint16_t)(core_id *
+ (num_vmdq_queues / num_cores + 1));
+ endQueue = (uint16_t)(startQueue +
+ (num_vmdq_queues / num_cores) + 1);
+ } else {
+ startQueue = (uint16_t)(core_id *
+ (num_vmdq_queues / num_cores) +
+ remainder);
+ endQueue = (uint16_t)(startQueue +
+ (num_vmdq_queues / num_cores));
+ }
+ } else {
+ startQueue = (uint16_t)(core_id *
+ (num_vmdq_queues / num_cores));
+ endQueue = (uint16_t)(startQueue +
+ (num_vmdq_queues / num_cores));
+ }
+
+ /* vmdq queue idx doesn't always start from zero.*/
+ startQueue += vmdq_queue_base;
+ endQueue += vmdq_queue_base;
+ printf("core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_id,
+ (unsigned)lcore_id, startQueue, endQueue - 1);
+
+ if (startQueue == endQueue) {
+ printf("lcore %u has nothing to do\n", lcore_id);
+ return 0;
+ }
+
+ for (;;) {
+ struct rte_mbuf *buf[MAX_PKT_BURST];
+ const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
+
+ for (p = 0; p < num_ports; p++) {
+ const uint8_t sport = ports[p];
+ /* 0 <-> 1, 2 <-> 3 etc */
+ const uint8_t dport = ports[p ^ 1];
+ if ((sport == INVALID_PORT_ID) || (dport == INVALID_PORT_ID))
+ continue;
+
+ for (q = startQueue; q < endQueue; q++) {
+ const uint16_t rxCount = rte_eth_rx_burst(sport,
+ q, buf, buf_size);
+
+ if (unlikely(rxCount == 0))
+ continue;
+
+ rxPackets[q] += rxCount;
+
+ for (i = 0; i < rxCount; i++)
+ update_mac_address(buf[i], dport);
+
+ const uint16_t txCount = rte_eth_tx_burst(dport,
+ vmdq_queue_base + core_id,
+ buf,
+ rxCount);
+
+ if (txCount != rxCount) {
+ for (i = txCount; i < rxCount; i++)
+ rte_pktmbuf_free(buf[i]);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Update the global var NUM_PORTS and array PORTS according to system ports number
+ * and return valid ports number
+ */
+static unsigned check_ports_num(unsigned nb_ports)
+{
+ unsigned valid_num_ports = num_ports;
+ unsigned portid;
+
+ if (num_ports > nb_ports) {
+ printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
+ num_ports, nb_ports);
+ num_ports = nb_ports;
+ }
+
+ for (portid = 0; portid < num_ports; portid++) {
+ if (ports[portid] >= nb_ports) {
+ printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
+ ports[portid], (nb_ports - 1));
+ ports[portid] = INVALID_PORT_ID;
+ valid_num_ports--;
+ }
+ }
+ return valid_num_ports;
+}
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ struct rte_mempool *mbuf_pool;
+ unsigned lcore_id, core_id = 0;
+ int ret;
+ unsigned nb_ports, valid_num_ports;
+ uint8_t portid;
+
+ signal(SIGHUP, sighup_handler);
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse app arguments */
+ ret = vmdq_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
+
+ for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++)
+ if (rte_lcore_is_enabled(lcore_id))
+ lcore_ids[core_id++] = lcore_id;
+
+ if (rte_lcore_count() > RTE_MAX_LCORE)
+ rte_exit(EXIT_FAILURE, "Not enough cores\n");
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Update the global var NUM_PORTS and global array PORTS
+ * and get value of var VALID_NUM_PORTS according to system ports number
+ */
+ valid_num_ports = check_ports_num(nb_ports);
+
+ if (valid_num_ports < 2 || valid_num_ports % 2) {
+ printf("Current valid ports number is %u\n", valid_num_ports);
+ rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
+ }
+
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
+ }
+
+ /* call lcore_main() on every lcore */
+ rte_eal_mp_remote_launch(lcore_main, NULL, CALL_MASTER);
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ if (rte_eal_wait_lcore(lcore_id) < 0)
+ return -1;
+ }
+
+ return 0;
+}
diff --git a/examples/vmdq_dcb/Makefile b/examples/vmdq_dcb/Makefile
new file mode 100644
index 00000000..8c51131b
--- /dev/null
+++ b/examples/vmdq_dcb/Makefile
@@ -0,0 +1,59 @@
+# BSD LICENSE
+#
+# Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+ifeq ($(RTE_SDK),)
+$(error "Please define RTE_SDK environment variable")
+endif
+
+# Default target, can be overriden by command line or environment
+RTE_TARGET ?= x86_64-native-linuxapp-gcc
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+# binary name
+APP = vmdq_dcb_app
+
+# all source are stored in SRCS-y
+SRCS-y := main.c
+
+CFLAGS += $(WERROR_FLAGS)
+
+# workaround for a gcc bug with noreturn attribute
+# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603
+ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y)
+CFLAGS_main.o += -Wno-return-type
+endif
+ifeq ($(CONFIG_RTE_TOOLCHAIN_ICC),y)
+CFLAGS_main.o += -diag-disable=vec
+endif
+EXTRA_CFLAGS += -O3 -g
+
+include $(RTE_SDK)/mk/rte.extapp.mk
diff --git a/examples/vmdq_dcb/main.c b/examples/vmdq_dcb/main.c
new file mode 100644
index 00000000..62e1422a
--- /dev/null
+++ b/examples/vmdq_dcb/main.c
@@ -0,0 +1,705 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2014 Intel Corporation. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <sys/queue.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <inttypes.h>
+#include <getopt.h>
+
+#include <rte_common.h>
+#include <rte_log.h>
+#include <rte_memory.h>
+#include <rte_memcpy.h>
+#include <rte_memzone.h>
+#include <rte_eal.h>
+#include <rte_per_lcore.h>
+#include <rte_launch.h>
+#include <rte_atomic.h>
+#include <rte_cycles.h>
+#include <rte_prefetch.h>
+#include <rte_lcore.h>
+#include <rte_per_lcore.h>
+#include <rte_branch_prediction.h>
+#include <rte_interrupts.h>
+#include <rte_pci.h>
+#include <rte_random.h>
+#include <rte_debug.h>
+#include <rte_ether.h>
+#include <rte_ethdev.h>
+#include <rte_ring.h>
+#include <rte_log.h>
+#include <rte_mempool.h>
+#include <rte_mbuf.h>
+#include <rte_memcpy.h>
+
+/* basic constants used in application */
+#define MAX_QUEUES 1024
+/*
+ * 1024 queues require to meet the needs of a large number of vmdq_pools.
+ * (RX/TX_queue_nb * RX/TX_ring_descriptors_nb) per port.
+ */
+#define NUM_MBUFS_PER_PORT (MAX_QUEUES * RTE_MAX(RTE_TEST_RX_DESC_DEFAULT, \
+ RTE_TEST_TX_DESC_DEFAULT))
+#define MBUF_CACHE_SIZE 64
+
+#define MAX_PKT_BURST 32
+
+/*
+ * Configurable number of RX/TX ring descriptors
+ */
+#define RTE_TEST_RX_DESC_DEFAULT 128
+#define RTE_TEST_TX_DESC_DEFAULT 512
+
+#define INVALID_PORT_ID 0xFF
+
+/* mask of enabled ports */
+static uint32_t enabled_port_mask;
+static uint8_t ports[RTE_MAX_ETHPORTS];
+static unsigned num_ports;
+
+/* number of pools (if user does not specify any, 32 by default */
+static enum rte_eth_nb_pools num_pools = ETH_32_POOLS;
+static enum rte_eth_nb_tcs num_tcs = ETH_4_TCS;
+static uint16_t num_queues, num_vmdq_queues;
+static uint16_t vmdq_pool_base, vmdq_queue_base;
+static uint8_t rss_enable;
+
+/* empty vmdq+dcb configuration structure. Filled in programatically */
+static const struct rte_eth_conf vmdq_dcb_conf_default = {
+ .rxmode = {
+ .mq_mode = ETH_MQ_RX_VMDQ_DCB,
+ .split_hdr_size = 0,
+ .header_split = 0, /**< Header Split disabled */
+ .hw_ip_checksum = 0, /**< IP checksum offload disabled */
+ .hw_vlan_filter = 0, /**< VLAN filtering disabled */
+ .jumbo_frame = 0, /**< Jumbo Frame Support disabled */
+ },
+ .txmode = {
+ .mq_mode = ETH_MQ_TX_VMDQ_DCB,
+ },
+ /*
+ * should be overridden separately in code with
+ * appropriate values
+ */
+ .rx_adv_conf = {
+ .vmdq_dcb_conf = {
+ .nb_queue_pools = ETH_32_POOLS,
+ .enable_default_pool = 0,
+ .default_pool = 0,
+ .nb_pool_maps = 0,
+ .pool_map = {{0, 0},},
+ .dcb_tc = {0},
+ },
+ .dcb_rx_conf = {
+ .nb_tcs = ETH_4_TCS,
+ /** Traffic class each UP mapped to. */
+ .dcb_tc = {0},
+ },
+ .vmdq_rx_conf = {
+ .nb_queue_pools = ETH_32_POOLS,
+ .enable_default_pool = 0,
+ .default_pool = 0,
+ .nb_pool_maps = 0,
+ .pool_map = {{0, 0},},
+ },
+ },
+ .tx_adv_conf = {
+ .vmdq_dcb_tx_conf = {
+ .nb_queue_pools = ETH_32_POOLS,
+ .dcb_tc = {0},
+ },
+ },
+};
+
+/* array used for printing out statistics */
+volatile unsigned long rxPackets[MAX_QUEUES] = {0};
+
+const uint16_t vlan_tags[] = {
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ 16, 17, 18, 19, 20, 21, 22, 23,
+ 24, 25, 26, 27, 28, 29, 30, 31
+};
+
+const uint16_t num_vlans = RTE_DIM(vlan_tags);
+/* pool mac addr template, pool mac addr is like: 52 54 00 12 port# pool# */
+static struct ether_addr pool_addr_template = {
+ .addr_bytes = {0x52, 0x54, 0x00, 0x12, 0x00, 0x00}
+};
+
+/* ethernet addresses of ports */
+static struct ether_addr vmdq_ports_eth_addr[RTE_MAX_ETHPORTS];
+
+/* Builds up the correct configuration for vmdq+dcb based on the vlan tags array
+ * given above, and the number of traffic classes available for use. */
+static inline int
+get_eth_conf(struct rte_eth_conf *eth_conf)
+{
+ struct rte_eth_vmdq_dcb_conf conf;
+ struct rte_eth_vmdq_rx_conf vmdq_conf;
+ struct rte_eth_dcb_rx_conf dcb_conf;
+ struct rte_eth_vmdq_dcb_tx_conf tx_conf;
+ uint8_t i;
+
+ conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
+ vmdq_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
+ tx_conf.nb_queue_pools = (enum rte_eth_nb_pools)num_pools;
+ conf.nb_pool_maps = num_pools;
+ vmdq_conf.nb_pool_maps = num_pools;
+ conf.enable_default_pool = 0;
+ vmdq_conf.enable_default_pool = 0;
+ conf.default_pool = 0; /* set explicit value, even if not used */
+ vmdq_conf.default_pool = 0;
+
+ for (i = 0; i < conf.nb_pool_maps; i++) {
+ conf.pool_map[i].vlan_id = vlan_tags[i];
+ vmdq_conf.pool_map[i].vlan_id = vlan_tags[i];
+ conf.pool_map[i].pools = 1UL << i;
+ vmdq_conf.pool_map[i].pools = 1UL << i;
+ }
+ for (i = 0; i < ETH_DCB_NUM_USER_PRIORITIES; i++){
+ conf.dcb_tc[i] = i % num_tcs;
+ dcb_conf.dcb_tc[i] = i % num_tcs;
+ tx_conf.dcb_tc[i] = i % num_tcs;
+ }
+ dcb_conf.nb_tcs = (enum rte_eth_nb_tcs)num_tcs;
+ (void)(rte_memcpy(eth_conf, &vmdq_dcb_conf_default, sizeof(*eth_conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_dcb_conf, &conf,
+ sizeof(conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.dcb_rx_conf, &dcb_conf,
+ sizeof(dcb_conf)));
+ (void)(rte_memcpy(&eth_conf->rx_adv_conf.vmdq_rx_conf, &vmdq_conf,
+ sizeof(vmdq_conf)));
+ (void)(rte_memcpy(&eth_conf->tx_adv_conf.vmdq_dcb_tx_conf, &tx_conf,
+ sizeof(tx_conf)));
+ if (rss_enable) {
+ eth_conf->rxmode.mq_mode = ETH_MQ_RX_VMDQ_DCB_RSS;
+ eth_conf->rx_adv_conf.rss_conf.rss_hf = ETH_RSS_IP |
+ ETH_RSS_UDP |
+ ETH_RSS_TCP |
+ ETH_RSS_SCTP;
+ }
+ return 0;
+}
+
+/*
+ * Initialises a given port using global settings and with the rx buffers
+ * coming from the mbuf_pool passed as parameter
+ */
+static inline int
+port_init(uint8_t port, struct rte_mempool *mbuf_pool)
+{
+ struct rte_eth_dev_info dev_info;
+ struct rte_eth_conf port_conf = {0};
+ const uint16_t rxRingSize = RTE_TEST_RX_DESC_DEFAULT;
+ const uint16_t txRingSize = RTE_TEST_TX_DESC_DEFAULT;
+ int retval;
+ uint16_t q;
+ uint16_t queues_per_pool;
+ uint32_t max_nb_pools;
+
+ /*
+ * The max pool number from dev_info will be used to validate the pool
+ * number specified in cmd line
+ */
+ rte_eth_dev_info_get(port, &dev_info);
+ max_nb_pools = (uint32_t)dev_info.max_vmdq_pools;
+ /*
+ * We allow to process part of VMDQ pools specified by num_pools in
+ * command line.
+ */
+ if (num_pools > max_nb_pools) {
+ printf("num_pools %d >max_nb_pools %d\n",
+ num_pools, max_nb_pools);
+ return -1;
+ }
+
+ /*
+ * NIC queues are divided into pf queues and vmdq queues.
+ * There is assumption here all ports have the same configuration!
+ */
+ vmdq_queue_base = dev_info.vmdq_queue_base;
+ vmdq_pool_base = dev_info.vmdq_pool_base;
+ printf("vmdq queue base: %d pool base %d\n",
+ vmdq_queue_base, vmdq_pool_base);
+ if (vmdq_pool_base == 0) {
+ num_vmdq_queues = dev_info.max_rx_queues;
+ num_queues = dev_info.max_rx_queues;
+ if (num_tcs != num_vmdq_queues / num_pools) {
+ printf("nb_tcs %d is invalid considering with"
+ " nb_pools %d, nb_tcs * nb_pools should = %d\n",
+ num_tcs, num_pools, num_vmdq_queues);
+ return -1;
+ }
+ } else {
+ queues_per_pool = dev_info.vmdq_queue_num /
+ dev_info.max_vmdq_pools;
+ if (num_tcs > queues_per_pool) {
+ printf("num_tcs %d > num of queues per pool %d\n",
+ num_tcs, queues_per_pool);
+ return -1;
+ }
+ num_vmdq_queues = num_pools * queues_per_pool;
+ num_queues = vmdq_queue_base + num_vmdq_queues;
+ printf("Configured vmdq pool num: %u,"
+ " each vmdq pool has %u queues\n",
+ num_pools, queues_per_pool);
+ }
+
+ if (port >= rte_eth_dev_count())
+ return -1;
+
+ retval = get_eth_conf(&port_conf);
+ if (retval < 0)
+ return retval;
+
+ /*
+ * Though in this example, all queues including pf queues are setup.
+ * This is because VMDQ queues doesn't always start from zero, and the
+ * PMD layer doesn't support selectively initialising part of rx/tx
+ * queues.
+ */
+ retval = rte_eth_dev_configure(port, num_queues, num_queues, &port_conf);
+ if (retval != 0)
+ return retval;
+
+ for (q = 0; q < num_queues; q++) {
+ retval = rte_eth_rx_queue_setup(port, q, rxRingSize,
+ rte_eth_dev_socket_id(port),
+ NULL,
+ mbuf_pool);
+ if (retval < 0) {
+ printf("initialize rx queue %d failed\n", q);
+ return retval;
+ }
+ }
+
+ for (q = 0; q < num_queues; q++) {
+ retval = rte_eth_tx_queue_setup(port, q, txRingSize,
+ rte_eth_dev_socket_id(port),
+ NULL);
+ if (retval < 0) {
+ printf("initialize tx queue %d failed\n", q);
+ return retval;
+ }
+ }
+
+ retval = rte_eth_dev_start(port);
+ if (retval < 0) {
+ printf("port %d start failed\n", port);
+ return retval;
+ }
+
+ rte_eth_macaddr_get(port, &vmdq_ports_eth_addr[port]);
+ printf("Port %u MAC: %02"PRIx8" %02"PRIx8" %02"PRIx8
+ " %02"PRIx8" %02"PRIx8" %02"PRIx8"\n",
+ (unsigned)port,
+ vmdq_ports_eth_addr[port].addr_bytes[0],
+ vmdq_ports_eth_addr[port].addr_bytes[1],
+ vmdq_ports_eth_addr[port].addr_bytes[2],
+ vmdq_ports_eth_addr[port].addr_bytes[3],
+ vmdq_ports_eth_addr[port].addr_bytes[4],
+ vmdq_ports_eth_addr[port].addr_bytes[5]);
+
+ /* Set mac for each pool.*/
+ for (q = 0; q < num_pools; q++) {
+ struct ether_addr mac;
+
+ mac = pool_addr_template;
+ mac.addr_bytes[4] = port;
+ mac.addr_bytes[5] = q;
+ printf("Port %u vmdq pool %u set mac %02x:%02x:%02x:%02x:%02x:%02x\n",
+ port, q,
+ mac.addr_bytes[0], mac.addr_bytes[1],
+ mac.addr_bytes[2], mac.addr_bytes[3],
+ mac.addr_bytes[4], mac.addr_bytes[5]);
+ retval = rte_eth_dev_mac_addr_add(port, &mac,
+ q + vmdq_pool_base);
+ if (retval) {
+ printf("mac addr add failed at pool %d\n", q);
+ return retval;
+ }
+ }
+
+ return 0;
+}
+
+/* Check num_pools parameter and set it if OK*/
+static int
+vmdq_parse_num_pools(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+ if (n != 16 && n != 32)
+ return -1;
+ if (n == 16)
+ num_pools = ETH_16_POOLS;
+ else
+ num_pools = ETH_32_POOLS;
+
+ return 0;
+}
+
+/* Check num_tcs parameter and set it if OK*/
+static int
+vmdq_parse_num_tcs(const char *q_arg)
+{
+ char *end = NULL;
+ int n;
+
+ /* parse number string */
+ n = strtol(q_arg, &end, 10);
+ if ((q_arg[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (n != 4 && n != 8)
+ return -1;
+ if (n == 4)
+ num_tcs = ETH_4_TCS;
+ else
+ num_tcs = ETH_8_TCS;
+
+ return 0;
+}
+
+static int
+parse_portmask(const char *portmask)
+{
+ char *end = NULL;
+ unsigned long pm;
+
+ /* parse hexadecimal string */
+ pm = strtoul(portmask, &end, 16);
+ if ((portmask[0] == '\0') || (end == NULL) || (*end != '\0'))
+ return -1;
+
+ if (pm == 0)
+ return -1;
+
+ return pm;
+}
+
+/* Display usage */
+static void
+vmdq_usage(const char *prgname)
+{
+ printf("%s [EAL options] -- -p PORTMASK]\n"
+ " --nb-pools NP: number of pools (32 default, 16)\n"
+ " --nb-tcs NP: number of TCs (4 default, 8)\n"
+ " --enable-rss: enable RSS (disabled by default)\n",
+ prgname);
+}
+
+/* Parse the argument (num_pools) given in the command line of the application */
+static int
+vmdq_parse_args(int argc, char **argv)
+{
+ int opt;
+ int option_index;
+ unsigned i;
+ const char *prgname = argv[0];
+ static struct option long_option[] = {
+ {"nb-pools", required_argument, NULL, 0},
+ {"nb-tcs", required_argument, NULL, 0},
+ {"enable-rss", 0, NULL, 0},
+ {NULL, 0, 0, 0}
+ };
+
+ /* Parse command line */
+ while ((opt = getopt_long(argc, argv, "p:", long_option,
+ &option_index)) != EOF) {
+ switch (opt) {
+ /* portmask */
+ case 'p':
+ enabled_port_mask = parse_portmask(optarg);
+ if (enabled_port_mask == 0) {
+ printf("invalid portmask\n");
+ vmdq_usage(prgname);
+ return -1;
+ }
+ break;
+ case 0:
+ if (!strcmp(long_option[option_index].name, "nb-pools")) {
+ if (vmdq_parse_num_pools(optarg) == -1) {
+ printf("invalid number of pools\n");
+ return -1;
+ }
+ }
+
+ if (!strcmp(long_option[option_index].name, "nb-tcs")) {
+ if (vmdq_parse_num_tcs(optarg) == -1) {
+ printf("invalid number of tcs\n");
+ return -1;
+ }
+ }
+
+ if (!strcmp(long_option[option_index].name, "enable-rss"))
+ rss_enable = 1;
+ break;
+
+ default:
+ vmdq_usage(prgname);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < RTE_MAX_ETHPORTS; i++) {
+ if (enabled_port_mask & (1 << i))
+ ports[num_ports++] = (uint8_t)i;
+ }
+
+ if (num_ports < 2 || num_ports % 2) {
+ printf("Current enabled port number is %u,"
+ " but it should be even and at least 2\n", num_ports);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void
+update_mac_address(struct rte_mbuf *m, unsigned dst_port)
+{
+ struct ether_hdr *eth;
+ void *tmp;
+
+ eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ /* 02:00:00:00:00:xx */
+ tmp = &eth->d_addr.addr_bytes[0];
+ *((uint64_t *)tmp) = 0x000000000002 + ((uint64_t)dst_port << 40);
+
+ /* src addr */
+ ether_addr_copy(&vmdq_ports_eth_addr[dst_port], &eth->s_addr);
+}
+
+/* When we receive a HUP signal, print out our stats */
+static void
+sighup_handler(int signum)
+{
+ unsigned q = vmdq_queue_base;
+
+ for (; q < num_queues; q++) {
+ if (q % (num_vmdq_queues / num_pools) == 0)
+ printf("\nPool %u: ", (q - vmdq_queue_base) /
+ (num_vmdq_queues / num_pools));
+ printf("%lu ", rxPackets[q]);
+ }
+ printf("\nFinished handling signal %d\n", signum);
+}
+
+/*
+ * Main thread that does the work, reading from INPUT_PORT
+ * and writing to OUTPUT_PORT
+ */
+static int
+lcore_main(void *arg)
+{
+ const uintptr_t core_num = (uintptr_t)arg;
+ const unsigned num_cores = rte_lcore_count();
+ uint16_t startQueue, endQueue;
+ uint16_t q, i, p;
+ const uint16_t quot = (uint16_t)(num_vmdq_queues / num_cores);
+ const uint16_t remainder = (uint16_t)(num_vmdq_queues % num_cores);
+
+
+ if (remainder) {
+ if (core_num < remainder) {
+ startQueue = (uint16_t)(core_num * (quot + 1));
+ endQueue = (uint16_t)(startQueue + quot + 1);
+ } else {
+ startQueue = (uint16_t)(core_num * quot + remainder);
+ endQueue = (uint16_t)(startQueue + quot);
+ }
+ } else {
+ startQueue = (uint16_t)(core_num * quot);
+ endQueue = (uint16_t)(startQueue + quot);
+ }
+
+ /* vmdq queue idx doesn't always start from zero.*/
+ startQueue += vmdq_queue_base;
+ endQueue += vmdq_queue_base;
+ printf("Core %u(lcore %u) reading queues %i-%i\n", (unsigned)core_num,
+ rte_lcore_id(), startQueue, endQueue - 1);
+
+ if (startQueue == endQueue) {
+ printf("lcore %u has nothing to do\n", (unsigned)core_num);
+ return 0;
+ }
+
+ for (;;) {
+ struct rte_mbuf *buf[MAX_PKT_BURST];
+ const uint16_t buf_size = sizeof(buf) / sizeof(buf[0]);
+ for (p = 0; p < num_ports; p++) {
+ const uint8_t src = ports[p];
+ const uint8_t dst = ports[p ^ 1]; /* 0 <-> 1, 2 <-> 3 etc */
+
+ if ((src == INVALID_PORT_ID) || (dst == INVALID_PORT_ID))
+ continue;
+
+ for (q = startQueue; q < endQueue; q++) {
+ const uint16_t rxCount = rte_eth_rx_burst(src,
+ q, buf, buf_size);
+
+ if (unlikely(rxCount == 0))
+ continue;
+
+ rxPackets[q] += rxCount;
+
+ for (i = 0; i < rxCount; i++)
+ update_mac_address(buf[i], dst);
+
+ const uint16_t txCount = rte_eth_tx_burst(dst,
+ q, buf, rxCount);
+ if (txCount != rxCount) {
+ for (i = txCount; i < rxCount; i++)
+ rte_pktmbuf_free(buf[i]);
+ }
+ }
+ }
+ }
+}
+
+/*
+ * Update the global var NUM_PORTS and array PORTS according to system ports number
+ * and return valid ports number
+ */
+static unsigned check_ports_num(unsigned nb_ports)
+{
+ unsigned valid_num_ports = num_ports;
+ unsigned portid;
+
+ if (num_ports > nb_ports) {
+ printf("\nSpecified port number(%u) exceeds total system port number(%u)\n",
+ num_ports, nb_ports);
+ num_ports = nb_ports;
+ }
+
+ for (portid = 0; portid < num_ports; portid++) {
+ if (ports[portid] >= nb_ports) {
+ printf("\nSpecified port ID(%u) exceeds max system port ID(%u)\n",
+ ports[portid], (nb_ports - 1));
+ ports[portid] = INVALID_PORT_ID;
+ valid_num_ports--;
+ }
+ }
+ return valid_num_ports;
+}
+
+
+/* Main function, does initialisation and calls the per-lcore functions */
+int
+main(int argc, char *argv[])
+{
+ unsigned cores;
+ struct rte_mempool *mbuf_pool;
+ unsigned lcore_id;
+ uintptr_t i;
+ int ret;
+ unsigned nb_ports, valid_num_ports;
+ uint8_t portid;
+
+ signal(SIGHUP, sighup_handler);
+
+ /* init EAL */
+ ret = rte_eal_init(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Error with EAL initialization\n");
+ argc -= ret;
+ argv += ret;
+
+ /* parse app arguments */
+ ret = vmdq_parse_args(argc, argv);
+ if (ret < 0)
+ rte_exit(EXIT_FAILURE, "Invalid VMDQ argument\n");
+
+ cores = rte_lcore_count();
+ if ((cores & (cores - 1)) != 0 || cores > RTE_MAX_LCORE) {
+ rte_exit(EXIT_FAILURE,"This program can only run on an even"
+ " number of cores(1-%d)\n\n", RTE_MAX_LCORE);
+ }
+
+ nb_ports = rte_eth_dev_count();
+ if (nb_ports > RTE_MAX_ETHPORTS)
+ nb_ports = RTE_MAX_ETHPORTS;
+
+ /*
+ * Update the global var NUM_PORTS and global array PORTS
+ * and get value of var VALID_NUM_PORTS according to system ports number
+ */
+ valid_num_ports = check_ports_num(nb_ports);
+
+ if (valid_num_ports < 2 || valid_num_ports % 2) {
+ printf("Current valid ports number is %u\n", valid_num_ports);
+ rte_exit(EXIT_FAILURE, "Error with valid ports number is not even or less than 2\n");
+ }
+
+ mbuf_pool = rte_pktmbuf_pool_create("MBUF_POOL",
+ NUM_MBUFS_PER_PORT * nb_ports, MBUF_CACHE_SIZE,
+ 0, RTE_MBUF_DEFAULT_BUF_SIZE, rte_socket_id());
+ if (mbuf_pool == NULL)
+ rte_exit(EXIT_FAILURE, "Cannot create mbuf pool\n");
+
+ /* initialize all ports */
+ for (portid = 0; portid < nb_ports; portid++) {
+ /* skip ports that are not enabled */
+ if ((enabled_port_mask & (1 << portid)) == 0) {
+ printf("\nSkipping disabled port %d\n", portid);
+ continue;
+ }
+ if (port_init(portid, mbuf_pool) != 0)
+ rte_exit(EXIT_FAILURE, "Cannot initialize network ports\n");
+ }
+
+ /* call lcore_main() on every slave lcore */
+ i = 0;
+ RTE_LCORE_FOREACH_SLAVE(lcore_id) {
+ rte_eal_remote_launch(lcore_main, (void*)i++, lcore_id);
+ }
+ /* call on master too */
+ (void) lcore_main((void*)i);
+
+ return 0;
+}